In [None]:
import pandas as pd
from math import sqrt
from datetime import datetime

In [None]:
# read in relevant data
file = "../DATA/03_valid_observations.csv"
df = pd.read_csv(file, delimiter=";")

In [None]:
# group data by timestamp
groups = df.groupby('timestamp')

In [None]:
# function which calcultes the n next neigbours to a given station
def calc_nearest_neighbours(obs, station, number=3):
    obs = obs[obs.name != station[0]]
    result = []
    east = station[1]
    north = station[2]
    up = station[3]
    for row in obs.iterrows():
        cr = row[1]
        dist = sqrt((east-cr['east'])*(east-cr['east'])+(north-cr['north'])*(north-cr['north'])+(up-cr['up'])*(up-cr['up']))
        r = [dist, cr['value']]
        result.append(r)
    result.sort(key=lambda x: x[0])
    return result[0:number]

In [None]:
# function to purify the result-datastructure (the found n nearest neighbours)
def prepare_list(result):
    newList = []
    for k in range(len(result)):
        n = [j for i in result[k] for j in i]
        newList.append(n)
    return newList

In [None]:
# function to create a dynamic dataframe depending on the number of considered neighbours
def create_final_dataframe(result):
    cols = []
    l = len(result[0])
    l = l/2
    for i in range(int(l)):
        cols.append('dist_{}'.format(i+1))
        cols.append('value_{}'.format(i+1))
    
    cols_df = pd.DataFrame(columns=cols, data=result)
    return cols_df

In [None]:
# run  progress to calculate the n nearest stations
result = []
l = len(groups)
cc = 1
pc = 1
for name, group in groups:
    # compute progress
    perc = (cc/l)*100
    if perc > pc:
        print('{0}: {1:.1f}%'.format(datetime.now(), perc))
        pc+=1
    cc+=1
    # compute n nearest neighbours
    for row in group.iterrows():
        cr = row[1]
        station = [cr['name'], cr['east'], cr['north'], cr['up']]
        result.append(calc_nearest_neighbours(group, station, number=10))

In [None]:
# create dataframe for results of calculation
result = prepare_list(result)
neighbours = create_final_dataframe(result)

In [None]:
# concatenate origin dataframe with results of calculation
res = pd.concat([df, neighbours], axis=1, sort=True)

In [None]:
# export dataframe as csv
file = "../DATA/04_station_nearest_stations.csv"
columns = res.columns.tolist()
res.to_csv(file, sep=";", columns=columns, index=False)