In [26]:
import numpy as np
import pandas as pd
from dotenv import load_dotenv

from xcontest.xcontest import login, flight_days
from wetter.wetter import get_combined_data

load_dotenv()

timeframe_start,timeframe_end = 2019010100,2023123123

df = get_combined_data(timeframe_start,timeframe_end, 'Sun',13, 13)

print(df.head())

driver = login()

flight_day_list= flight_days(driver, 'Metzingen')

          strength wind_alignment precip
meas_hour       13             13     13
meas_day                                
20190106       4.6       0.969846    0.0
20190113       8.4       0.750000    0.8
20190120       3.0       0.586824    0.0
20190127       7.2       0.030154    0.0
20190203       4.8       0.883022    0.0
['21.04.2024', '31.03.2024', '10.02.2024', '09.09.2023', '11.08.2023', '05.08.2023', '07.07.2023', '18.06.2023', '10.06.2023', '04.06.2023', '03.06.2023', '28.05.2023', '27.05.2023', '18.05.2023', '30.04.2023', '23.04.2023', '07.04.2023', '18.03.2023', '30.10.2022', '09.10.2022', '30.09.2022', '21.08.2022', '02.07.2022', '04.06.2022', '03.06.2022', '18.04.2022', '06.03.2022', '15.01.2022', '24.10.2021', '17.10.2021', '10.10.2021', '02.10.2021', '21.08.2021', '12.08.2021', '25.07.2021', '10.07.2021', '04.07.2021', '27.06.2021', '26.06.2021', '01.06.2021', '30.05.2021', '08.05.2021', '01.05.2021', '17.10.2020', '20.09.2020', '17.09.2020', '19.08.2020', '09.08.2020',

In [38]:
from datetime import datetime
from sklearn.neighbors import KNeighborsRegressor

flight_day_as_mess_date = [int(datetime.strftime(datetime.strptime(d,'%d.%m.%Y'), '%Y%m%d')) for d in flight_day_list]

df['is_flight_day'] = df.apply(lambda row: 1 if int(row.name) in flight_day_as_mess_date else 0, axis=1)
#print(df.head(10))

X, y = df[['strength','wind_alignment','precip']].to_numpy(), df['is_flight_day'].to_numpy() # features and labels
#print(X.shape, y.shape)

# perceptron ?  https://en.wikipedia.org/wiki/Perceptron - no separating hyperplane
# kNN ?
neigh = KNeighborsRegressor(n_neighbors=5, weights= 'distance')
neigh.fit(X, y)

#ytilde = pd.Series(neigh.predict(X),name='knn')
df['knn'] =  df.apply(lambda row:  neigh.predict([[row['strength',13],row['wind_alignment',13],row['precip',13]]])[0], axis=1)
#print(df.head(50))

df_actual = df[(df['is_flight_day']==1)].copy()
df_missed = df[(df['knn']>= 0.4) & (df['is_flight_day']==0)].copy()
print(df_missed.head())

          strength wind_alignment precip is_flight_day  knn
meas_hour       13             13     13                   
meas_day                                                   
20191229       3.6       0.413176    0.0             0  0.5
20211128       2.4       0.586824    0.0             0  0.5
20230205       2.5       0.586824    0.0             0  0.5


In [13]:
from datetime import datetime
from sklearn.model_selection import train_test_split
from scipy.special import softmax

flight_day_as_mess_date = [int(datetime.strftime(datetime.strptime(d,'%d.%m.%Y'), '%Y%m%d13')) for d in flight_day_list]

df_wind['is_flight_day'] = df_wind.apply(lambda row: 1 if row['MESS_DATUM'] in flight_day_as_mess_date else 0, axis=1)
# 
df_wind_train, df_wind_test = train_test_split(df_wind, test_size=0.2 )
X, y = df_wind_train[['strength','wind_alignment','precip']].to_numpy().T, df_wind_train['is_flight_day'].to_numpy() # features and labels
Y = np.asarray([[1,0] if f else [0,1] for f in y]).T
class_weights =np.diag( [1.0-s/len(y) for s in Y.sum(axis=1)])

print(class_weights)

nx, m = np.shape(X)
nh = 100 # number of hidden units
ny = 2 # number of classes
assert(np.shape(Y)==(ny,m))
print(nx,m,nh,ny)

alpha = 0.1

# define activation and derivative
# LeRU
f = np.vectorize(lambda x: x if x > 0 else 0)
fprime = np.vectorize(lambda x: 1 if x > 0 else 0)

# init 
Xtilde = np.vstack((np.asarray([1 for _ in range(m)]) , X ))
#print(np.shape(Xtilde))
W1, W2 = 0.001* np.random.random_sample(size=((nh,nx+1))),  0.001*np.random.random_sample(size=((ny,nh+1)))

# loop
for _ in range(10):
    alpha = 0.9*alpha
    # FW 
    Z1 = W1.dot(Xtilde)
    A1 = f(np.array(Z1))
    #print(np.shape(A1))
    A1tilde =  np.vstack((np.asarray([1 for _ in range(m)]) , A1 ))
    Z2 = W2.dot(A1tilde)
    #print(np.shape(Z2))
    A2 = softmax(Z2)
    #print(np.shape(A2))
    loss = - np.sum(class_weights.dot(Y) * np.log(A2))/m/ny
    print(loss)

    # backpropagation
    dZ2 =(np.eye(ny)-class_weights).dot(A2) - class_weights.dot(Y) + A2*Y # imbalanced classes
    dW2 = (dZ2.dot(A1tilde.T))/m 
    #print(np.shape(dW2))
    dZ1tilde = W2.T.dot( dZ2) * fprime(np.vstack((np.asarray([1 for _ in range(m)]) , Z1 ))) # last term is Z1-prime
    dW1 = dZ1tilde[1:,:].dot(Xtilde.T)/m

    # step
    W1 += -alpha*dW1
    W2 += -alpha*dW2

# test
Xtest, ytest = df_wind_test[['strength','wind_alignment','precip']].to_numpy().T, df_wind_test['is_flight_day'].to_numpy() 
Xtilde = np.vstack((np.asarray([1 for _ in range(len(ytest))]) , Xtest ))
Z1 = W1.dot(Xtilde)
A1 = f(np.array(Z1))
A1tilde =  np.vstack((np.asarray([1 for _ in range(len(ytest))]) , A1 ))
Z2 = W2.dot(A1tilde)
A2 = softmax(Z2)

for r in (np.vstack((ytest , A2)).T):
    if r[0]:
        print(r)


[[0.95454545 0.        ]
 [0.         0.04545455]]
3 1452 100 2
0.34597330314034813
0.34597334553932013
0.34597338522077287
0.34597342223309985
0.3459734566462403
0.3459734885483026
0.345973518041899
0.34597354524052304
0.34597357026517356
0.3459735932413418
[1.         0.00137775 0.0013768 ]
[1.         0.00137778 0.00137683]
[1.         0.0013778  0.00137685]
[1.         0.00137787 0.00137693]
[1.         0.00137777 0.00137681]
[1.         0.00137775 0.00137679]
[1.         0.00137777 0.00137682]
[1.         0.00137776 0.00137681]
[1.         0.00137782 0.00137687]
[1.         0.00137787 0.00137693]
[1.         0.00137776 0.00137681]


In [None]:
)