In [1]:
import requests
import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
import os.path

In [3]:
def get_weather(city, years=10, overwrite=False):
    
    '''
    This function receives the name of a city and a number of years, and returns a dataframe 
    with weather data from this city during those past years
    '''
    
    # First we declare the weather parameters. Here we'll be taking all params supported by the API
    weather_params = ['temperature_2m','relativehumidity_2m','dewpoint_2m',
                  'apparent_temperature','pressure_msl','surface_pressure',
                  'precipitation','rain','snowfall','cloudcover',
                  'cloudcover_low','cloudcover_mid','cloudcover_high',
                  'shortwave_radiation','direct_radiation','direct_normal_irradiance',
                  'diffuse_radiation','windspeed_10m','windspeed_100m',
                  'winddirection_10m','winddirection_100m','windgusts_10m',
                  'et0_fao_evapotranspiration','weathercode','vapor_pressure_deficit',
                  'soil_temperature_0_to_7cm','soil_temperature_7_to_28cm',
                  'soil_temperature_28_to_100cm','soil_temperature_100_to_255cm',
                  'soil_moisture_0_to_7cm','soil_moisture_7_to_28cm',
                  'soil_moisture_28_to_100cm','soil_moisture_100_to_255cm']

    # This request is done in order to get the latitude and longitude of the desired city
    city_response = requests.get('https://geocoding-api.open-meteo.com/v1/search',
                       params = {'name': city}).json()

    lat = city_response['results'][0]['latitude']
    lon = city_response['results'][0]['longitude']

    # Then we compute the dates used to get the weather data
    ## The API only has data until 9 days ago
    end_date = (datetime.date.today() - relativedelta(days=8)).strftime('%Y-%m-%d') 
    #start_date = (datetime.date.today() - relativedelta(years=years)).strftime('%Y-%m-%d')
    start_date = '2013-01-01'

    # So we make the request to the weather API archive
    weather_response = requests.get('https://archive-api.open-meteo.com/v1/archive',
                       params = {'latitude': lat,
                                'longitude': lon,
                                'start_date': start_date,
                                'end_date': end_date,
                                'hourly': weather_params,
                                'timezone': 'auto'}).json()

    weather_df = pd.DataFrame(weather_response['hourly'], columns = ['time'] + weather_params)
    weather_df['time'] = pd.to_datetime(weather_df['time'], format='%Y-%m-%d')
    weather_df = weather_df.set_index('time')

    print('Done ✅')
    return weather_df

In [8]:
weather_df = get_weather('Amiens', years=10)
weather_df

Done ✅


Unnamed: 0_level_0,temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,...,weathercode,vapor_pressure_deficit,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01 00:00:00,8.5,87,6.4,3.5,1003.8,990.8,0.9,0.9,0.0,100,...,53,0.15,7.8,8.1,8.4,9.8,0.430,0.399,0.407,0.366
2013-01-01 01:00:00,8.3,88,6.4,3.8,1003.3,990.3,0.7,0.7,0.0,100,...,53,0.13,7.7,8.1,8.4,9.8,0.429,0.402,0.407,0.366
2013-01-01 02:00:00,8.3,89,6.7,4.5,1002.9,989.9,0.6,0.6,0.0,100,...,53,0.12,7.7,8.1,8.4,9.8,0.428,0.404,0.407,0.366
2013-01-01 03:00:00,8.3,92,7.0,5.1,1002.6,989.6,0.5,0.5,0.0,100,...,53,0.09,7.8,8.1,8.4,9.8,0.428,0.407,0.407,0.366
2013-01-01 04:00:00,8.3,92,7.2,5.3,1002.8,989.8,0.7,0.7,0.0,100,...,53,0.08,7.8,8.1,8.4,9.8,0.430,0.409,0.407,0.366
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-03 19:00:00,4.2,78,0.8,0.5,1026.4,1012.9,0.0,0.0,0.0,22,...,1,0.18,4.8,4.6,5.6,7.9,0.311,0.323,0.358,0.375
2023-03-03 20:00:00,3.3,83,0.7,-0.5,1026.9,1013.3,0.0,0.0,0.0,11,...,0,0.13,3.9,4.6,5.6,7.9,0.311,0.323,0.358,0.375
2023-03-03 21:00:00,2.7,87,0.7,-1.2,1027.5,1013.9,0.0,0.0,0.0,23,...,1,0.10,3.3,4.5,5.6,7.9,0.311,0.323,0.358,0.375
2023-03-03 22:00:00,2.4,88,0.6,-1.5,1027.8,1014.1,0.0,0.0,0.0,38,...,1,0.09,3.0,4.4,5.6,7.9,0.311,0.322,0.358,0.375


In [11]:
def get_energy_production(limit, offset, refine,overwrite=False):
    
    '''
    This function receives the name of a region, a limit and an offset, and returns a dataframe 
    with energy production data from this region
    '''

    #params to pass into the requests
    params = {'limit': limit, 'offset': offset, 'refine': f'libelle_region:{refine}'}

    #request the API for the data from 2013-2022
    url_2013_2022 = 'https://odre.opendatasoft.com/api/v2/catalog/datasets/eco2mix-regional-cons-def/exports/json'
    response_2013_2022 = requests.get(url=url_2013_2022,params = params).json()

    #transform API request into a dataframe
    df_2013_2022 = pd.DataFrame(response_2013_2022)

    #request the API for the data from 2022-today
    url_2022_today = 'https://odre.opendatasoft.com/api/v2/catalog/datasets/eco2mix-regional-tr/exports/json'

    response_2022_today = requests.get(url=url_2022_today,params = params).json()

    #transform API request into a dataframe
    df_2022_today = pd.DataFrame(response_2022_today)

    #merge those two together on just columns that exist in the first one
    energy_production_df = pd.concat([df_2013_2022, df_2022_today], sort=False,join="inner")

    #transform the column "date_heure", so that it is compatible with the weather data
    energy_production_df.insert(0, "time", energy_production_df['date'] + ' ' + energy_production_df['heure'])
    energy_production_df['time'] =  pd.to_datetime(energy_production_df['time'])
    energy_production_df = energy_production_df.sort_values('time')
    energy_production_df = energy_production_df.set_index('time')
        
    return energy_production_df

In [12]:
energy_production_df = get_energy_production(-1, 0, 'Hauts-de-France')
energy_production_df

Unnamed: 0_level_0,code_insee_region,libelle_region,nature,date,heure,date_heure,consommation,thermique,nucleaire,eolien,...,tco_nucleaire,tch_nucleaire,tco_eolien,tch_eolien,tco_solaire,tch_solaire,tco_hydraulique,tch_hydraulique,tco_bioenergies,tch_bioenergies
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01 00:00:00,32,Hauts-de-France,Données définitives,2013-01-01,00:00,2012-12-31T23:00:00+00:00,,,,,...,,,,,,,,,,
2013-01-01 00:30:00,32,Hauts-de-France,Données définitives,2013-01-01,00:30,2012-12-31T23:30:00+00:00,5989.0,828.0,4499.0,1115.0,...,,,,,,,,,,
2013-01-01 01:00:00,32,Hauts-de-France,Données définitives,2013-01-01,01:00,2013-01-01T00:00:00+00:00,5832.0,830.0,4500.0,1062.0,...,,,,,,,,,,
2013-01-01 01:30:00,32,Hauts-de-France,Données définitives,2013-01-01,01:30,2013-01-01T00:30:00+00:00,5926.0,833.0,4499.0,1014.0,...,,,,,,,,,,
2013-01-01 02:00:00,32,Hauts-de-France,Données définitives,2013-01-01,02:00,2013-01-01T01:00:00+00:00,5695.0,828.0,4497.0,951.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-11 22:45:00,32,Hauts-de-France,Données temps réel,2023-03-11,22:45,2023-03-11T21:45:00+00:00,,,,,...,,,,,,,,,,
2023-03-11 23:00:00,32,Hauts-de-France,Données temps réel,2023-03-11,23:00,2023-03-11T22:00:00+00:00,,,,,...,,,,,,,,,,
2023-03-11 23:15:00,32,Hauts-de-France,Données temps réel,2023-03-11,23:15,2023-03-11T22:15:00+00:00,,,,,...,,,,,,,,,,
2023-03-11 23:30:00,32,Hauts-de-France,Données temps réel,2023-03-11,23:30,2023-03-11T22:30:00+00:00,,,,,...,,,,,,,,,,


In [13]:
merged_df = pd.merge(weather_df, energy_production_df, left_index=True, right_index=True)
merged_df

Unnamed: 0_level_0,temperature_2m,relativehumidity_2m,dewpoint_2m,apparent_temperature,pressure_msl,surface_pressure,precipitation,rain,snowfall,cloudcover,...,tco_nucleaire,tch_nucleaire,tco_eolien,tch_eolien,tco_solaire,tch_solaire,tco_hydraulique,tch_hydraulique,tco_bioenergies,tch_bioenergies
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-01 00:00:00,8.5,87,6.4,3.5,1003.8,990.8,0.9,0.9,0.0,100,...,,,,,,,,,,
2013-01-01 01:00:00,8.3,88,6.4,3.8,1003.3,990.3,0.7,0.7,0.0,100,...,,,,,,,,,,
2013-01-01 02:00:00,8.3,89,6.7,4.5,1002.9,989.9,0.6,0.6,0.0,100,...,,,,,,,,,,
2013-01-01 03:00:00,8.3,92,7.0,5.1,1002.6,989.6,0.5,0.5,0.0,100,...,,,,,,,,,,
2013-01-01 04:00:00,8.3,92,7.2,5.3,1002.8,989.8,0.7,0.7,0.0,100,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-03 19:00:00,4.2,78,0.8,0.5,1026.4,1012.9,0.0,0.0,0.0,22,...,60.05,82.97,16.54,23.24,0.07,1.43,0.03,50.0,1.62,59.51
2023-03-03 20:00:00,3.3,83,0.7,-0.5,1026.9,1013.3,0.0,0.0,0.0,11,...,60.84,82.93,15.75,21.82,0.00,0.00,0.03,50.0,1.63,59.02
2023-03-03 21:00:00,2.7,87,0.7,-1.2,1027.5,1013.9,0.0,0.0,0.0,23,...,66.66,82.97,17.23,21.80,0.00,0.00,0.03,50.0,1.8,59.51
2023-03-03 22:00:00,2.4,88,0.6,-1.5,1027.8,1014.1,0.0,0.0,0.0,38,...,74.15,82.97,18.50,21.04,0.00,0.00,0.03,50.0,1.98,59.02


In [16]:
merged_df.to_csv('dataframe.csv')

In [None]:
def get_weather(city, years=10):
    weather_params = ['temperature_2m']
    city_response = requests.get('https://geocoding-api.open-meteo.com/v1/search',
                       params = {'name': city}).json()
    lat = city_response['results'][0]['latitude']
    lon = city_response['results'][0]['longitude']
    end_date = (datetime.date.today() - relativedelta(days=8)).strftime('%Y-%m-%d') 
    start_date = '2013-01-01'
    weather_response = requests.get('https://archive-api.open-meteo.com/v1/archive',
                       params = {'latitude': lat,
                                'longitude': lon,
                                'start_date': start_date,
                                'end_date': end_date,
                                'hourly': weather_params,
                                'timezone': 'auto'}).json()
    weather_df = pd.DataFrame(weather_response['hourly'], columns = ['time'] + weather_params)
    weather_df['time'] = pd.to_datetime(weather_df['time'], format='%Y-%m-%d')
    weather_df = weather_df.set_index('time')
    return weather_df

def get_energy_production(limit, offset, refine):
    params = {'limit': limit, 'offset': offset, 'refine': f'libelle_region:{refine}'}
    url_2013_2022 = 'https://odre.opendatasoft.com/api/v2/catalog/datasets/eco2mix-regional-cons-def/exports/json'
    response_2013_2022 = requests.get(url=url_2013_2022,params = params).json()
    df_2013_2022 = pd.DataFrame(response_2013_2022)
    url_2022_today = 'https://odre.opendatasoft.com/api/v2/catalog/datasets/eco2mix-regional-tr/exports/json'
    response_2022_today = requests.get(url=url_2022_today,params = params).json()
    df_2022_today = pd.DataFrame(response_2022_today)
    energy_production_df = pd.concat([df_2013_2022, df_2022_today], sort=False,join="inner")
    energy_production_df.insert(0, "time", energy_production_df['date'] + ' ' + energy_production_df['heure'])
    energy_production_df['time'] =  pd.to_datetime(energy_production_df['time'])
    energy_production_df = energy_production_df.sort_values('time')
    energy_production_df = energy_production_df.set_index('time')
    return energy_production_df

def merge_weather_energy_df(weather_df, energy_production_df):
    merged_df = pd.merge(weather_df, energy_production_df, left_index=True, right_index=True)