In [1]:
import cdsapi
import netCDF4 as nc
import numpy as np
import pandas as pd
import xarray as xr
import pickle

In [None]:
# Madrid

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-land',
    {
        'variable': [
            '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',
            'surface_net_solar_radiation', 'total_precipitation',
        ],
        'year': [
            '2021', '2022',
        ],
        'month': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ],
        'day': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
            '13', '14', '15',
            '16', '17', '18',
            '19', '20', '21',
            '22', '23', '24',
            '25', '26', '27',
            '28', '29', '30',
            '31',
        ],
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'area': [
            40.5, -3.8, 40.4,
            -3.6,
        ],
        'format': 'netcdf.zip',
    },
    'download.netcdf.zip')

In [None]:
# Bordeaux

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-land',
    {
        'variable': [
            '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',
            'surface_net_solar_radiation', 'total_precipitation',
        ],
        'year': [
            '2016',
        ],
        'month': [
            '11'
        ],
        'day': [
            '21','22', '23', 
            '24','25', '26', 
            '27'
        ],
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'area': [
            44.8, -0.6, 44.8,
            -0.6,
        ],
        'format': 'netcdf.zip',
    },
    'download.netcdf.zip')

In [None]:
# Constance

c = cdsapi.Client()

c.retrieve(
    'reanalysis-era5-land',
    {
        'variable': [
            '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature',
            'surface_net_solar_radiation', 'total_precipitation',
        ],
        'year': [
            '2017',
        ],
        'month': [
            '2'
        ],
        'day': [
            '13','14', '15', 
            '16','17', '18', 
            '19'
        ],
        'time': [
            '00:00', '01:00', '02:00',
            '03:00', '04:00', '05:00',
            '06:00', '07:00', '08:00',
            '09:00', '10:00', '11:00',
            '12:00', '13:00', '14:00',
            '15:00', '16:00', '17:00',
            '18:00', '19:00', '20:00',
            '21:00', '22:00', '23:00',
        ],
        'area': [
            47.7, 9.2, 47.7,
            9.2,
        ],
        'format': 'netcdf.zip',
    },
    'download.netcdf.zip')

In [2]:
def dataframe_generator(filenames):
    '''Creates a unique pandas dataframe from each atmospheric data file. Takes a list of strings
    (the filenames containing atmospheric data).'''
    # We create a basepath with the filenames. It will be outputed to a pandas df using the xarray library.
    base_folder = 'C:/Users/Mikel/Desktop/'
    final_df = pd.DataFrame()
    for file in filenames:
        file_path = base_folder + file
        ds = xr.open_dataset(file_path)
        df = ds.to_dataframe()
        final_df = pd.concat([final_df, df])
    return final_df

In [3]:
# Creation of main df

df = dataframe_generator(['data.nc'])
df.dropna(inplace = True)

In [4]:
# Given the stations' geographical distribution, they could be associated with the (40.4, -3.7) point 
# or the (40.5, -3.7) point (in lat, long). We'll check if there is 

stations = pd.read_csv('C:/Users/Mikel/Desktop/estaciones.csv')
stations['sector'] = np.where((stations['latitud'] < 40.45 ) , 'A', 'B')
stations['sector'].value_counts()

A    156
Name: sector, dtype: int64

In [5]:
# The stations will therefore be associated to the (40.4, -3.7) point.

df = df.query('longitude == -3.700000047683716 and latitude == 40.400001525878906')
df = df.reset_index()
df = df[['time', 'u10', 'v10', 't2m', 'ssr', 'tp']]
df = df.set_index('time')
df = df.asfreq(freq='15min')
df = df.interpolate(method='time')

In [6]:
# Exporting the df

filepath = 'C:/Users/Mikel/Desktop/Archivos/Estudios/Ciencia de Datos/TFM/Tercera iteración/meteo.csv'
df.to_csv(filepath)

In [7]:
# Datasets for transfer learning

df_bordeaux = dataframe_generator(['data_bordeaux.nc'])
df_bordeaux.dropna(inplace = True)

df_constance = dataframe_generator(['data_const.nc'])
df_constance.dropna(inplace = True)

In [8]:
# Resampling

df_bordeaux = df_bordeaux.reset_index()
df_bordeaux = df_bordeaux[['time', 'u10', 'v10', 't2m', 'ssr', 'tp']]
df_bordeaux = df_bordeaux.set_index('time')
df_bordeaux = df_bordeaux.asfreq(freq='15min')
df_bordeaux = df_bordeaux.interpolate(method='time')

df_constance = df_constance.reset_index()
df_constance = df_constance[['time', 'u10', 'v10', 't2m', 'ssr', 'tp']]
df_constance = df_constance.set_index('time')
df_constance = df_constance.asfreq(freq='15min')
df_constance = df_constance.interpolate(method='time')

In [9]:
# Exporting the dfs


filepath = 'C:/Users/Mikel/Desktop/Archivos/Estudios/Ciencia de Datos/TFM/Tercera iteración/meteo_bordeaux.csv'
df_bordeaux.to_csv(filepath)

filepath = 'C:/Users/Mikel/Desktop/Archivos/Estudios/Ciencia de Datos/TFM/Tercera iteración/meteo_constance.csv'
df_constance.to_csv(filepath)