## Load flights from DHV XC

In [1]:
import requests 
import urllib
import pandas as pd

PAGE_SIZE = 500
#DHVXC_REST_DECODED_URL = 'https://de.dhv-xc.de/api/fli/flights?fkcat[]=1&fkto[]={place_id}&navpars={"start":{start},"limit":{limit},"sort":[{"field":"FlightDate","dir":-1},{"field":"BestTaskPoints","dir":-1}]}'

places = {
    'Metzingen':11185,
    'Estorf': 11001,
    'Leese': 10746,
    'Lüdingen':9759,
    'Brunsberg': 9844,
    'Kella': 9521,
    'Börry': 9403,
    'Porta': 9712,    
    'Königszinne': 11489,
    'Rammelsberg': 9427    
}


results = []
#for place in ['Metzingen','Rammelsberg']:
for place in places:
    limit = PAGE_SIZE
    for start in [0, PAGE_SIZE]:
        query = {"navpars":{"start":start,"limit":limit,"sort":[{"field":"FlightDate","dir":-1},{"field":"BestTaskPoints","dir":-1}]}}
        # for some reason, " is replaced by ' and spaces are added which is replaced by +
        decoded_url = f"https://de.dhv-xc.de/api/fli/flights?s?fkcat%5B%5D=1&fkto%5B%5D={places[place]}&{urllib.parse.urlencode(query,quote_via=urllib.parse.quote_plus).replace('%27', '%22').replace('+', '')}"
        #print(decoded_url)
        r = requests.get(decoded_url)
        if r.status_code==200:
            response = r.json()
            df = pd.DataFrame(response['data'])
            #print(df.columns.values)
            results.extend(df[['FlightDate', 'TakeoffWaypointName' , 'Glider' ,'FlightDuration']].values)


df_results = pd.DataFrame(results,columns=['FlightDate', 'Takeoff' , 'Glider' ,'Duration'])
print(len(df_results))
df_results.tail()


10000


Unnamed: 0,FlightDate,Takeoff,Glider,Duration
9995,2022-08-06,Rammelsberg,U-Fly 3 XL,193
9996,2022-08-06,Rammelsberg,U-Double 38,386
9997,2022-08-06,Rammelsberg,Alpha 7 26,238
9998,2022-08-06,Rammelsberg,Ion 5 M,192
9999,2022-08-06,Rammelsberg,Alpha 7 26,191


## Load DWD measured data (for display purposes)

In [2]:
import sys
sys.path.insert(0, "../wetter")
from datetime import datetime
from dotenv import load_dotenv
from wetter import get_wind_data,get_rain_data

load_dotenv('../.env')

timeframe_start,timeframe_end = 2023010100,2024123123

df_wind = get_wind_data(timeframe_start,timeframe_end ,13) # @ 13:00 
df_wind['Date'] = df_wind.apply(lambda row: datetime.strptime(str(row['MESS_DATUM'])[:8], "%Y%m%d").strftime( "%Y-%m-%d"), axis=1)
df_wind.set_index('Date', inplace=True, drop=True)

df_wind.tail()

Unnamed: 0_level_0,MESS_DATUM,strength,direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-09-27,2024092713,8.9,220
2024-09-28,2024092813,7.4,280
2024-09-29,2024092913,1.7,170
2024-09-30,2024093013,6.8,120
2024-10-01,2024100113,3.4,190


## Organize data

In [3]:
from datetime import datetime

wind_dir = {
    0:  'N',
    10: 'N',
    20: 'NNO',
    30: 'NNO',
    40: 'NO',
    50: 'NO',
    60: 'ONO',
    70: 'ONO',
    80: 'O',
    90: 'O',
    100: 'O',
    110: 'OSO',
    120: 'OSO',
    130: 'SO',
    140: 'SO',
    150: 'SSO',
    160: 'SSO',
    170: 'S',
    180: 'S',
    190: 'S',
    200: 'SSW',
    210: 'SSW',
    220: 'SW',
    230: 'SW',
    240: 'WSW',
    250: 'WSW',
    260: 'W',
    270: 'W',
    280: 'W',    
    290: 'WNW',
    300: 'WNW',
    310: 'NW',
    320: 'NW',
    330: 'NNW',
    340: 'NNW',
    350: 'N',
    360: 'N'
}


#df_results[['FlightDate', 'Takeoff']].groupby('Takeoff').aggregate(['min','max'])
df_results = df_results[(df_results['FlightDate']>'2023-01-01') & (df_results['FlightDate']<'2024-10-01')]

df_results['Takeoff'] = df_results.apply(lambda row: row['Takeoff'].split('-')[0].strip() ,axis=1)
df_gr= df_results[['FlightDate', 'Takeoff','Duration']].groupby(by=['Takeoff','FlightDate' ]).aggregate(count=('Duration','count')).reset_index()
#df_gr.tail()
df_pvt = pd.pivot_table(df_gr,index='FlightDate',columns='Takeoff',aggfunc='sum', fill_value=0)
df_pvt.columns = df_pvt.columns.droplevel(0)
df_pvt = df_pvt[['Metzingen','Lüdingen','Leese','Estorf','Brunsberg','Kella Berg','Börry','Porta','Königszinne','Rammelsberg']]
df_pvt['Year'] = df_pvt.apply(lambda row: str(row.name)[:4], axis=1)
df_pvt['Weekday'] = df_pvt.apply(lambda row: datetime.strptime(row.name,'%Y-%m-%d').strftime("%A"), axis=1)
df_pvt['HAJ Wind dir'] = df_pvt.apply(lambda row:wind_dir[df_wind.loc[row.name,'direction']] if df_wind.index.intersection([row.name]).any() else None,axis=1)
df_pvt['HAJ Wind str'] = df_pvt.apply(lambda row:df_wind.loc[row.name,'strength']*3.6 if df_wind.index.intersection([row.name]).any() else None,axis=1)


filter = df_pvt['Weekday'].isin(['Saturday','Sunday'])
df_pvt[(df_pvt['Metzingen']>=0) & (filter)]

Takeoff,Metzingen,Lüdingen,Leese,Estorf,Brunsberg,Kella Berg,Börry,Porta,Königszinne,Rammelsberg,Year,Weekday,HAJ Wind dir,HAJ Wind str
FlightDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-01-07,0,0,0,0,0,0,13,0,0,0,2023,Saturday,SSW,25.20
2023-01-08,0,0,0,0,0,0,8,3,0,0,2023,Sunday,SSW,15.84
2023-01-28,0,0,0,0,0,0,0,0,0,2,2023,Saturday,SSW,8.64
2023-02-04,0,0,0,8,0,1,0,0,0,0,2023,Saturday,OSO,8.28
2023-02-19,0,0,0,0,0,0,0,0,0,2,2023,Sunday,WNW,13.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-08,0,0,0,0,0,0,0,0,0,11,2024,Sunday,W,14.76
2024-09-15,0,0,0,0,0,0,0,0,0,6,2024,Sunday,NNW,16.20
2024-09-21,0,21,0,0,12,6,0,0,0,0,2024,Saturday,O,19.80
2024-09-22,16,16,12,0,0,0,1,0,0,13,2024,Sunday,SO,7.20


# Fit model

* define regressor
* create train (2023) and test (2024) data sets

In [4]:
from sklearn import ensemble

rf = ensemble.RandomForestRegressor(random_state=0, n_estimators=200)

df_training, df_test = df_pvt[(filter) & (df_pvt['Year']=='2023')].copy(deep=True), df_pvt[(filter) & (df_pvt['Year']=='2024')].copy(deep=True)

yattr,xattr = 'Metzingen', ['Lüdingen','Leese','Estorf','Brunsberg','Kella Berg','Börry','Porta','Königszinne','Rammelsberg']


rf.fit(df_training[xattr].to_numpy(),df_training[yattr].to_numpy())

df_training['predict'] = df_training.apply(lambda row: rf.predict([row[xattr].to_numpy()])[0] , axis=1)

#df_training[df_training['predict'] != df_training['Metzingen']]

df_test['predict'] =df_test.apply(lambda row: rf.predict([row[xattr].to_numpy()])[0] , axis=1)


## Evaluate

In [12]:
threshold = 5

# TRUE POSITIVE: 7
df_tp = df_test[(df_test['predict'] >= threshold) & (df_test['Metzingen'] > 0)] 
#print(df_tp[yattr+xattr + ['HAJ Wind dir', 'HAJ Wind str','predict']].to_markdown())
# TRUE NEGATIVE: - BB, Kella, Rammi Hammertage
df_tn = df_test[(df_test['predict'] < threshold) & (df_test['Metzingen'] == 0)] 
#print(len(df_tn))
#print(df_tn[xattr + ['HAJ Wind dir', 'HAJ Wind str','predict']].to_markdown())
 # FALSE NEGATIVE: 6 - Starkwind, Windenseminar, Pilotinnentreff, Christian D
df_fn = df_test[(df_test['predict'] < threshold) & (df_test['Metzingen'] > 0)]
# FALSE POSITIVE = MISSED DAYS: 8
df_fp = df_test[(df_test['predict'] >= threshold) & (df_test['Metzingen'] == 0)]

tp, fp, fn = len(df_tp), len(df_fp), len(df_fn)
print(f'Scores: TP {tp}, FP {fp}, FN {fn}')
print(f'Precision: {tp/(tp+fp):.3f}, recall: {tp/(tp+fn):.3f}')

print("\nTrue positives\n")
print(df_tp[[yattr]+xattr + ['HAJ Wind dir', 'HAJ Wind str','predict']].to_markdown())
print("\n\nFalse positives\n")
print(df_fp[xattr + ['HAJ Wind dir', 'HAJ Wind str','predict']].to_markdown())
print("\n\nFalse negatives\n")
print(df_fn[[yattr] + xattr + ['HAJ Wind dir', 'HAJ Wind str','predict']].to_markdown())


Scores: TP 7, FP 8, FN 6
Precision: 0.467, recall: 0.538

True positives

| FlightDate   |   Metzingen |   Lüdingen |   Leese |   Estorf |   Brunsberg |   Kella Berg |   Börry |   Porta |   Königszinne |   Rammelsberg | HAJ Wind dir   |   HAJ Wind str |   predict |
|:-------------|------------:|-----------:|--------:|---------:|------------:|-------------:|--------:|--------:|--------------:|--------------:|:---------------|---------------:|----------:|
| 2024-05-26   |           7 |          0 |       0 |        8 |           0 |            0 |      13 |      17 |             0 |             0 | S              |          14.4  |     9.22  |
| 2024-06-29   |           5 |         15 |      15 |        0 |           0 |            5 |       0 |       0 |             0 |             0 | O              |          11.52 |    14.1   |
| 2024-07-14   |          18 |          0 |       0 |        0 |           0 |            0 |       2 |      12 |             2 |             0 | SSW         