## METAR

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xarray as xr

In [53]:
metar_data = pd.read_csv('data/metar_msg_overview.csv')

In [54]:
# Convert issued_at to datetime
metar_data['issued_at'] = pd.to_datetime(metar_data['issued_at'])

In [55]:
metar_data

Unnamed: 0,airport_identifier,issued_at,metar,metartype
0,ENDR,2021-01-01 00:20:00,ENDR 010020Z 14007KT 03/M02 Q1006=,AUTO
1,ENZV,2021-01-01 00:20:00,ENZV 010020Z 13006KT 9999 SCT026 BKN035 03/01 ...,
2,ENTC,2021-01-01 00:20:00,ENTC 010020Z 21015KT CAVOK 02/M03 Q1013 RMK WI...,
3,ENSB,2021-01-01 00:20:00,ENSB 010020Z 24004KT 9999 -SN FEW009 BKN033 M0...,
4,ENGM,2021-01-01 00:20:00,ENGM 010020Z 36006KT 9999 4900E -SN FEW009 OVC...,
...,...,...,...,...
3275708,ENLA,2023-12-31 23:50:00,ENLA 312350Z 11021KT 9999NDV BKN036/// 07/06 Q...,AUTO
3275709,ENUS,2023-12-31 23:50:00,ENUS 312350Z 10033G43KT 9999NDV BKN022/// 00/M...,AUTO
3275710,ENOL,2023-12-31 23:50:00,ENOL 312350Z 12029G41KT 9999 DRSN NSC M03/M09 ...,
3275711,ENWV,2023-12-31 23:50:00,ENWV 312350Z 14019KT 9999NDV SCT014/// OVC037/...,AUTO


### Round to each whole hour for METAR and then remove duplicates

In [56]:
metar_data_xarray = xr.Dataset.from_dataframe(metar_data)

In [57]:
metar_data_xarray

In [58]:
metar_data_xarray['rounded_issued_at'] = metar_data_xarray['issued_at'].dt.round('H')

In [59]:
metar_data_xarray.head(200)

In [60]:
metar_data_pandas = metar_data_xarray.to_dataframe()

In [61]:
metar_data_pandas.head(100)

Unnamed: 0_level_0,airport_identifier,issued_at,metar,metartype,rounded_issued_at
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,ENDR,2021-01-01 00:20:00,ENDR 010020Z 14007KT 03/M02 Q1006=,AUTO,2021-01-01 00:00:00
1,ENZV,2021-01-01 00:20:00,ENZV 010020Z 13006KT 9999 SCT026 BKN035 03/01 ...,,2021-01-01 00:00:00
2,ENTC,2021-01-01 00:20:00,ENTC 010020Z 21015KT CAVOK 02/M03 Q1013 RMK WI...,,2021-01-01 00:00:00
3,ENSB,2021-01-01 00:20:00,ENSB 010020Z 24004KT 9999 -SN FEW009 BKN033 M0...,,2021-01-01 00:00:00
4,ENGM,2021-01-01 00:20:00,ENGM 010020Z 36006KT 9999 4900E -SN FEW009 OVC...,,2021-01-01 00:00:00
...,...,...,...,...,...
95,ENAL,2021-01-01 01:20:00,ENAL 010120Z 08005KT 9999 FEW027/// SCT040/// ...,AUTO,2021-01-01 01:00:00
96,ENCN,2021-01-01 01:20:00,ENCN 010120Z 04003KT 2900 OVC059/// M01/M02 Q1...,AUTO,2021-01-01 01:00:00
97,ENHD,2021-01-01 01:20:00,ENHD 010120Z 10007KT 8000 NCD 02/01 Q1002=,AUTO,2021-01-01 01:00:00
98,ENRO,2021-01-01 01:20:00,ENRO 010120Z 23003KT 6000NDV -SN BKN003/// OVC...,AUTO,2021-01-01 01:00:00


In [62]:
metar_data_pandas= metar_data_pandas.drop_duplicates(subset=['airport_identifier', 'rounded_issued_at']).reset_index(drop=True)

In [63]:
metar_data_pandas.head(100)

Unnamed: 0,airport_identifier,issued_at,metar,metartype,rounded_issued_at
0,ENDR,2021-01-01 00:20:00,ENDR 010020Z 14007KT 03/M02 Q1006=,AUTO,2021-01-01 00:00:00
1,ENZV,2021-01-01 00:20:00,ENZV 010020Z 13006KT 9999 SCT026 BKN035 03/01 ...,,2021-01-01 00:00:00
2,ENTC,2021-01-01 00:20:00,ENTC 010020Z 21015KT CAVOK 02/M03 Q1013 RMK WI...,,2021-01-01 00:00:00
3,ENSB,2021-01-01 00:20:00,ENSB 010020Z 24004KT 9999 -SN FEW009 BKN033 M0...,,2021-01-01 00:00:00
4,ENGM,2021-01-01 00:20:00,ENGM 010020Z 36006KT 9999 4900E -SN FEW009 OVC...,,2021-01-01 00:00:00
...,...,...,...,...,...
95,ENDU,2021-01-01 01:50:00,ENDU 010150Z 11013KT 9999 NCD 00/M05 Q1013 RMK...,AUTO,2021-01-01 02:00:00
96,ENGM,2021-01-01 01:50:00,ENGM 010150Z 36006KT 9999 6000 R19R/P2000N R01...,,2021-01-01 02:00:00
97,ENVA,2021-01-01 01:50:00,ENVA 010150Z 12008KT CAVOK M07/M08 Q1007 RMK W...,,2021-01-01 02:00:00
98,ENAL,2021-01-01 01:50:00,ENAL 010150Z 09006KT 9999 BKN025/// M01/M02 Q1...,AUTO,2021-01-01 02:00:00


In [64]:
# Sort data based on airport and timestamp
metar_data_pandas.sort_values(['airport_identifier', 'rounded_issued_at'], inplace=True)

# Set airport and issued_at as index
metar_data_pandas.set_index(['airport_identifier', 'rounded_issued_at'], inplace=True)

# Group by airport
metar_data_pandas.groupby(['airport_identifier'])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x14e1429df5b0>

In [65]:
# Replace NaN with MANUAL
metar_data_pandas['metartype'].fillna('MANUAL', inplace=True)

In [66]:
metar_data_pandas

Unnamed: 0_level_0,Unnamed: 1_level_0,issued_at,metar,metartype
airport_identifier,rounded_issued_at,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ENAL,2021-01-01 00:00:00,2021-01-01 00:20:00,ENAL 010020Z 08005KT 9999 SCT024/// BKN037/// ...,AUTO
ENAL,2021-01-01 01:00:00,2021-01-01 00:50:00,ENAL 010050Z 09006KT 9999 FEW025/// 00/M02 Q1006=,AUTO
ENAL,2021-01-01 02:00:00,2021-01-01 01:50:00,ENAL 010150Z 09006KT 9999 BKN025/// M01/M02 Q1...,AUTO
ENAL,2021-01-01 03:00:00,2021-01-01 02:50:00,ENAL 010250Z 10005KT 9999 OVC029/// 00/M02 Q1006=,AUTO
ENAL,2021-01-01 04:00:00,2021-01-01 03:50:00,ENAL 010350Z VRB03KT 9999 OVC024/// 02/M01 Q1006=,AUTO
...,...,...,...,...
ENZV,2023-12-31 20:00:00,2023-12-31 19:50:00,ENZV 311950Z 10017KT CAVOK 05/M04 Q0994=,MANUAL
ENZV,2023-12-31 21:00:00,2023-12-31 20:50:00,ENZV 312050Z 10018KT CAVOK 05/M04 Q0994=,MANUAL
ENZV,2023-12-31 22:00:00,2023-12-31 21:50:00,ENZV 312150Z 10022KT CAVOK 05/M05 Q0994=,MANUAL
ENZV,2023-12-31 23:00:00,2023-12-31 22:50:00,ENZV 312250Z 10021KT CAVOK 05/M04 Q0994=,MANUAL


## MERGE TEST

In [67]:
# Import netcdf-file
filename = '/lustre/storeB/immutable/archive/projects/metproduction/meps/2023/10/10/meps_lagged_6_h_subset_2_5km_20231010T00Z.nc'
data = xr.open_dataset(filename)

airport_file= pd.read_csv('data/airports_overview.csv')
airport_identifiers = airport_file['airport_identifier']

In [19]:
# Function for calculating the nearest latitude and longitude coordinates based on pre-defined target values
# 

def find_nearest_coordinates(airport_identifiers, target_lat, target_lon):
    latitude_values = data['latitude'].values
    longitude_values = data['longitude'].values

    # Calculate the euclidean distance from the target coordinates
    distances = np.sqrt((latitude_values - target_lat)**2 + (longitude_values - target_lon)**2)

    # Find indices of the minimum distance
    nearest_index = np.unravel_index(distances.argmin(), distances.shape)

    # Extract corresponding y and x indices
    nearest_y_index = nearest_index[0]
    nearest_x_index = nearest_index[1]

    # Extract data using found indices
    print(f'For airport {airport_identifiers}: Successfully extracted (y,x) for target_latitude = {target_lat} and target_longitude: {target_lon}')
    return nearest_y_index, nearest_x_index


In [20]:
# Function for locating the value of the corresponding x- and y-values based on index found from find_nearest_coordinates

def extract_vals_to_df(data_param, y_index, x_index):
    
    data_param = data_param.sel(   
        y = data['y'][y_index],   
        x = data['x'][x_index],
        method='nearest'
     )
    data_param = data_param.to_dataframe()
    return data_param

In [21]:
airport_location = pd.read_csv('data/airport_positions.csv')
airport_location

Unnamed: 0,airport_identifier,name,position
0,ENAL,ALESUND/VIGRA RWY 07/25,"(6256,611)"
1,ENAN,ANDOYA/ANDENES RWY 03/21 14/32,"(6929,1614)"
2,ENAS,NY-ALESUND/HAMNERABBEN RWY 12/30,"(7892,1187)"
3,ENAT,ALTA RWY 11/29,"(6997,2337)"
4,ENBJ,BJORNOYA,"(7450,1908)"
...,...,...,...
85,ENVD,VADSO RWY 08/26,"(7006,2984)"
86,ENWV,VALHALL A,"(5627,339)"
87,ENVR,VAEROY,"(6765,1272)"
88,ENXW,GRANE,"(5916,248)"


In [22]:
# Remove parenthesis -> split lon and lat values -> divide by 100 to obtain correct values
airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)
airport_location[['latitude', 'longitude']] = airport_location[['latitude', 'longitude']].astype('float') / 100

  airport_location[['latitude', 'longitude']] = airport_location['position'].str.replace(r'[\(\)]', '').str.split(',', expand=True)


In [23]:
# Function for processing each row from each airport through earlier defined functions

def process_airport_row(row):
    target_lat = row['latitude']
    target_lon = row['longitude']
    airport = row['airport_identifier']

    nearest_y_index, nearest_x_index = find_nearest_coordinates(airport, target_lat, target_lon)
    
    result_df = extract_vals_to_df(data['air_temperature_2m'], nearest_y_index, nearest_x_index)
    
    return result_df

In [25]:
test_dataset = airport_location.head(2)
test_dataset

Unnamed: 0,airport_identifier,name,position,latitude,longitude
0,ENAL,ALESUND/VIGRA RWY 07/25,"(6256,611)",62.56,6.11
1,ENAN,ANDOYA/ANDENES RWY 03/21 14/32,"(6929,1614)",69.29,16.14


In [37]:
# Applying to dataframe
result = test_dataset.apply(process_airport_row, axis=1)

# Concatenate the results into a final DF
final_result = pd.concat(result.tolist(), keys=test_dataset['airport_identifier'])

final_result

For airport ENAL: Successfully extracted (y,x) for target_latitude = 62.56 and target_longitude: 6.11
For airport ENAN: Successfully extracted (y,x) for target_latitude = 69.29 and target_longitude: 16.14


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,x,y,longitude,latitude,air_temperature_2m
airport_identifier,time,height1,ensemble_member,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENAL,2023-10-10 00:00:00,2.0,0,-455084.062500,-50017.90625,6.089178,62.565422,282.125488
ENAL,2023-10-10 00:00:00,2.0,1,-455084.062500,-50017.90625,6.089178,62.565422,282.072205
ENAL,2023-10-10 00:00:00,2.0,2,-455084.062500,-50017.90625,6.089178,62.565422,282.544434
ENAL,2023-10-10 00:00:00,2.0,3,-455084.062500,-50017.90625,6.089178,62.565422,281.222900
ENAL,2023-10-10 00:00:00,2.0,4,-455084.062500,-50017.90625,6.089178,62.565422,281.682007
...,...,...,...,...,...,...,...,...
ENAN,2023-10-12 13:00:00,2.0,25,44915.945312,667482.12500,16.135429,69.287653,278.767792
ENAN,2023-10-12 13:00:00,2.0,26,44915.945312,667482.12500,16.135429,69.287653,280.006653
ENAN,2023-10-12 13:00:00,2.0,27,44915.945312,667482.12500,16.135429,69.287653,279.102173
ENAN,2023-10-12 13:00:00,2.0,28,44915.945312,667482.12500,16.135429,69.287653,278.592926


In [80]:
test_dataset = final_result

In [81]:
test_dataset = test_dataset.reset_index(level=['height1', 'ensemble_member'])

In [82]:
test_dataset = test_dataset.rename_axis(index={'time': 'rounded_issued_at'})

In [83]:
test_dataset

Unnamed: 0_level_0,Unnamed: 1_level_0,height1,ensemble_member,x,y,longitude,latitude,air_temperature_2m
airport_identifier,rounded_issued_at,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENAL,2023-10-10 00:00:00,2.0,0,-455084.062500,-50017.90625,6.089178,62.565422,282.125488
ENAL,2023-10-10 00:00:00,2.0,1,-455084.062500,-50017.90625,6.089178,62.565422,282.072205
ENAL,2023-10-10 00:00:00,2.0,2,-455084.062500,-50017.90625,6.089178,62.565422,282.544434
ENAL,2023-10-10 00:00:00,2.0,3,-455084.062500,-50017.90625,6.089178,62.565422,281.222900
ENAL,2023-10-10 00:00:00,2.0,4,-455084.062500,-50017.90625,6.089178,62.565422,281.682007
...,...,...,...,...,...,...,...,...
ENAN,2023-10-12 13:00:00,2.0,25,44915.945312,667482.12500,16.135429,69.287653,278.767792
ENAN,2023-10-12 13:00:00,2.0,26,44915.945312,667482.12500,16.135429,69.287653,280.006653
ENAN,2023-10-12 13:00:00,2.0,27,44915.945312,667482.12500,16.135429,69.287653,279.102173
ENAN,2023-10-12 13:00:00,2.0,28,44915.945312,667482.12500,16.135429,69.287653,278.592926


In [84]:
test_merge = pd.merge(test_dataset, metar_data_pandas, left_index=True, right_index=True, how='inner')

In [85]:
test_merge

Unnamed: 0_level_0,Unnamed: 1_level_0,height1,ensemble_member,x,y,longitude,latitude,air_temperature_2m,issued_at,metar,metartype
airport_identifier,rounded_issued_at,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ENAL,2023-10-10 00:00:00,2.0,0,-455084.062500,-50017.90625,6.089178,62.565422,282.125488,2023-10-09 23:50:00,ENAL 092350Z 21015KT 9999 FEW030/// OVC039/// ...,AUTO
ENAL,2023-10-10 00:00:00,2.0,1,-455084.062500,-50017.90625,6.089178,62.565422,282.072205,2023-10-09 23:50:00,ENAL 092350Z 21015KT 9999 FEW030/// OVC039/// ...,AUTO
ENAL,2023-10-10 00:00:00,2.0,2,-455084.062500,-50017.90625,6.089178,62.565422,282.544434,2023-10-09 23:50:00,ENAL 092350Z 21015KT 9999 FEW030/// OVC039/// ...,AUTO
ENAL,2023-10-10 00:00:00,2.0,3,-455084.062500,-50017.90625,6.089178,62.565422,281.222900,2023-10-09 23:50:00,ENAL 092350Z 21015KT 9999 FEW030/// OVC039/// ...,AUTO
ENAL,2023-10-10 00:00:00,2.0,4,-455084.062500,-50017.90625,6.089178,62.565422,281.682007,2023-10-09 23:50:00,ENAL 092350Z 21015KT 9999 FEW030/// OVC039/// ...,AUTO
...,...,...,...,...,...,...,...,...,...,...,...
ENAN,2023-10-12 13:00:00,2.0,25,44915.945312,667482.12500,16.135429,69.287653,278.767792,2023-10-12 12:50:00,ENAN 121250Z 04023KT CAVOK 05/M02 Q0988=,MANUAL
ENAN,2023-10-12 13:00:00,2.0,26,44915.945312,667482.12500,16.135429,69.287653,280.006653,2023-10-12 12:50:00,ENAN 121250Z 04023KT CAVOK 05/M02 Q0988=,MANUAL
ENAN,2023-10-12 13:00:00,2.0,27,44915.945312,667482.12500,16.135429,69.287653,279.102173,2023-10-12 12:50:00,ENAN 121250Z 04023KT CAVOK 05/M02 Q0988=,MANUAL
ENAN,2023-10-12 13:00:00,2.0,28,44915.945312,667482.12500,16.135429,69.287653,278.592926,2023-10-12 12:50:00,ENAN 121250Z 04023KT CAVOK 05/M02 Q0988=,MANUAL
