In [1]:
import openmeteo_requests
import time
import requests_cache
import pandas as pd
from retry_requests import retry

In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [3]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"

In [4]:
countries = [
    {"name": "Slovenia", "lat": 46.1512, "lon": 14.9955},
    {"name": "United Kingdom", "lat": 55.3781, "lon": -3.4360},
    {"name": "France", "lat": 46.6034, "lon": 1.8883},
    {"name": "Greece", "lat": 39.0742, "lon": 21.8243},
    {"name": "Estonia", "lat": 58.5953, "lon": 25.0136},
    {"name": "Azerbaijan", "lat": 40.1431, "lon": 47.5769},
    {"name": "Czechia", "lat": 49.8175, "lon": 15.4729},
    {"name": "Costa Rica", "lat": 9.7489, "lon": -83.7534},
    {"name": "Maldives", "lat": 3.2028, "lon": 73.2207},
    {"name": "Mauritius", "lat": -20.3484, "lon": 57.5522},
    {"name": "Oman", "lat": 21.4735, "lon": 55.9754},
    {"name": "Nigeria", "lat": 9.0820, "lon": 8.6753},
    {"name": "Uruguay", "lat": -32.5228, "lon": -55.7658},
    {"name": "Malawi", "lat": -13.2543, "lon": 34.3015},
    {"name": "Malta", "lat": 35.9375, "lon": 14.3754},
    {"name": "Kazakhstan", "lat": 48.0196, "lon": 66.9237},
    {"name": "Kenya", "lat": -1.2864, "lon": 36.8172},
    {"name": "Mauritania", "lat": 20.2540, "lon": -10.9517},
    {"name": "Mongolia", "lat": 46.8625, "lon": 103.8467},
    {"name": "Viet Nam", "lat": 14.0583, "lon": 108.2772},
    {"name": "Argentina", "lat": -38.4161, "lon": -63.6167},
    {"name": "Armenia", "lat": 40.0691, "lon": 45.0382},
    {"name": "Barbados", "lat": 13.1939, "lon": -59.5432},
    {"name": "Belgium", "lat": 50.8503, "lon": 4.3517},
    {"name": "Bolivia (Plurinational State of)", "lat": -16.2902, "lon": -63.5887},
    {"name": "Bosnia Herzegovina", "lat": 43.9159, "lon": 17.6791},
    {"name": "Canada", "lat": 56.1304, "lon": -106.3468},
    {"name": "Chile", "lat": -35.6751, "lon": -71.5430},
    {"name": "Colombia", "lat": 4.5709, "lon": -74.2973},
    {"name": "Croatia", "lat": 45.1000, "lon": 15.2000},
    {"name": "Denmark", "lat": 56.2639, "lon": 9.5018},
    {"name": "El Salvador", "lat": 13.7942, "lon": -88.8965},
    {"name": "Guatemala", "lat": 15.7835, "lon": -90.2308},
    {"name": "Iceland", "lat": 64.9631, "lon": -19.0208},
    {"name": "Indonesia", "lat": -0.7893, "lon": 113.9213},
    {"name": "Ireland", "lat": 53.4129, "lon": -8.2439},
    {"name": "Israel", "lat": 31.0461, "lon": 34.8516},
    {"name": "Italy", "lat": 41.8719, "lon": 12.5674},
    {"name": "Japan", "lat": 36.2048, "lon": 138.2529},
    {"name": "Lithuania", "lat": 55.1694, "lon": 23.8813},
    {"name": "Luxembourg", "lat": 49.8153, "lon": 6.1296},
    {"name": "Malaysia", "lat": 4.2105, "lon": 101.9758},
    {"name": "Montserrat", "lat": 16.7425, "lon": -62.1874},
    {"name": "Morocco", "lat": 31.7917, "lon": -7.0926},
    {"name": "Netherlands", "lat": 52.1326, "lon": 5.2913},
    {"name": "New Zealand", "lat": -40.9006, "lon": 174.8860},
    {"name": "Norway", "lat": 60.4720, "lon": 8.4689},
    {"name": "Pakistan", "lat": 30.3753, "lon": 69.3451},
    {"name": "USA", "lat": 37.0902, "lon": -95.7129},
    {"name": "Uzbekistan", "lat": 41.3775, "lon": 64.5853}
]

In [5]:
weather_data = {}

# Create extract_daily_dataframe function
def extract_daily_dataframe(response):
    daily_data = response.Daily()

    # Extract each daily data variable
    temp_max = daily_data.Variables(0).ValuesAsNumpy()
    temp_min = daily_data.Variables(1).ValuesAsNumpy()
    temp_mean = daily_data.Variables(2).ValuesAsNumpy()
    prec_sum = daily_data.Variables(3).ValuesAsNumpy()
    rain_sum = daily_data.Variables(4).ValuesAsNumpy()
    wind_max = daily_data.Variables(5).ValuesAsNumpy()
    wind_gusts_max = daily_data.Variables(6).ValuesAsNumpy()


    daily_time = pd.date_range(
    start  = pd.to_datetime(daily_data.Time(),    unit="s", utc=True),
    end    = pd.to_datetime(daily_data.TimeEnd(), unit="s", utc=True),
    freq   = pd.Timedelta(seconds=daily_data.Interval()),
    inclusive="left"
    )


    df = pd.DataFrame({
    "date": daily_time,
    "temperature_2m_max": temp_max,
    "temperature_2m_min": temp_min,
    "temperature_2m_mean": temp_mean,
    "precipitation_sum": prec_sum,
    "rain_sum": rain_sum,
    "wind_speed_10m_max": wind_max,
    "wind_gusts_10m_max": wind_gusts_max,
    })
    
    return df

for i, country in enumerate(countries):
    params = {
        "latitude": country["lat"],
        "longitude": country["lon"],
        "start_date": "2015-01-01",
        "end_date": "2024-12-31",
        "daily": [
            "temperature_2m_max",
            "temperature_2m_min",
            "temperature_2m_mean",
            "precipitation_sum",
            "rain_sum",
            "wind_speed_10m_max",
            "wind_gusts_10m_max",
        ],
        "timezone": "UTC",  # or local port timezone, e.g. "America/Los_Angeles"
    }
    responses = openmeteo.weather_api(url, params=params)
    # process the single response[0] for the given port
    response = responses[0]
        
    daily_df = extract_daily_dataframe(response)

    # store the resulting dataframe in a dict keyed by port name
    weather_data[country["name"]] = {"daily": daily_df}
    if i < len(countries) - 1:
        time.sleep(15)

print(weather_data)

{'Slovenia': {'daily':                           date  temperature_2m_max  temperature_2m_min  \
0    2015-01-01 00:00:00+00:00           -1.076000          -20.425999   
1    2015-01-02 00:00:00+00:00            3.624000           -9.676000   
2    2015-01-03 00:00:00+00:00            2.624000           -5.026000   
3    2015-01-04 00:00:00+00:00            4.674000           -5.926000   
4    2015-01-05 00:00:00+00:00            4.924000           -5.926000   
...                        ...                 ...                 ...   
3648 2024-12-27 00:00:00+00:00            5.799000           -3.701000   
3649 2024-12-28 00:00:00+00:00            7.349000           -1.851000   
3650 2024-12-29 00:00:00+00:00            8.199000           -0.901000   
3651 2024-12-30 00:00:00+00:00            7.399000           -0.951000   
3652 2024-12-31 00:00:00+00:00           11.299001           -1.501000   

      temperature_2m_mean  precipitation_sum  rain_sum  wind_speed_10m_max  \
0         

In [6]:
# Create a dataframe
dfs = []
for country_info in countries:
    country_name = country_info["name"]
    lat = country_info["lat"]
    lon = country_info["lon"]

    # Get daily dataframe for this port
    daily_df = weather_data[country_name]["daily"].copy()

    # Optionally add a column identifying the port
    daily_df["port"] = country_name
    daily_df["latitude"] = lat
    daily_df["longitude"] = lon

    # Collect the DataFrame
    dfs.append(daily_df)

# Now combine them all
combined_df = pd.concat(dfs, ignore_index=True)

# Now you can do combined_df.head()
print(combined_df.head())

                       date  temperature_2m_max  temperature_2m_min  \
0 2015-01-01 00:00:00+00:00              -1.076          -20.425999   
1 2015-01-02 00:00:00+00:00               3.624           -9.676000   
2 2015-01-03 00:00:00+00:00               2.624           -5.026000   
3 2015-01-04 00:00:00+00:00               4.674           -5.926000   
4 2015-01-05 00:00:00+00:00               4.924           -5.926000   

   temperature_2m_mean  precipitation_sum  rain_sum  wind_speed_10m_max  \
0            -8.350999                0.0       0.0            5.411986   
1            -2.130167                0.4       0.4            7.421590   
2            -0.798917                0.0       0.0            7.568566   
3            -0.426000                0.0       0.0           15.042659   
4            -1.009333                0.0       0.0            9.885262   

   wind_gusts_10m_max      port  latitude  longitude  
0           15.119999  Slovenia   46.1512    14.9955  
1           

In [7]:
combined_df.head()

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,temperature_2m_mean,precipitation_sum,rain_sum,wind_speed_10m_max,wind_gusts_10m_max,port,latitude,longitude
0,2015-01-01 00:00:00+00:00,-1.076,-20.425999,-8.350999,0.0,0.0,5.411986,15.119999,Slovenia,46.1512,14.9955
1,2015-01-02 00:00:00+00:00,3.624,-9.676,-2.130167,0.4,0.4,7.42159,17.280001,Slovenia,46.1512,14.9955
2,2015-01-03 00:00:00+00:00,2.624,-5.026,-0.798917,0.0,0.0,7.568566,25.559999,Slovenia,46.1512,14.9955
3,2015-01-04 00:00:00+00:00,4.674,-5.926,-0.426,0.0,0.0,15.042659,35.279999,Slovenia,46.1512,14.9955
4,2015-01-05 00:00:00+00:00,4.924,-5.926,-1.009333,0.0,0.0,9.885262,28.440001,Slovenia,46.1512,14.9955


In [8]:
combined_df.shape

(182650, 11)

In [9]:
combined_df.to_pickle('/Users/bobbi/Careerfoundry/Shipping_project/02 Data/Prepared Data/country_weather_2.pkl')

In [10]:
combined_df['port'].unique()

array(['Slovenia', 'United Kingdom', 'France', 'Greece', 'Estonia',
       'Azerbaijan', 'Czechia', 'Costa Rica', 'Maldives', 'Mauritius',
       'Oman', 'Nigeria', 'Uruguay', 'Malawi', 'Malta', 'Kazakhstan',
       'Kenya', 'Mauritania', 'Mongolia', 'Viet Nam', 'Argentina',
       'Armenia', 'Barbados', 'Belgium',
       'Bolivia (Plurinational State of)', 'Bosnia Herzegovina', 'Canada',
       'Chile', 'Colombia', 'Croatia', 'Denmark', 'El Salvador',
       'Guatemala', 'Iceland', 'Indonesia', 'Ireland', 'Israel', 'Italy',
       'Japan', 'Lithuania', 'Luxembourg', 'Malaysia', 'Montserrat',
       'Morocco', 'Netherlands', 'New Zealand', 'Norway', 'Pakistan',
       'USA', 'Uzbekistan'], dtype=object)