***
#### Import of required libraries

In [None]:
from traffic.core import Traffic
from tqdm.notebook import tqdm

***
#### Import data

In [None]:
t = Traffic.from_file(
    "/mnt/beegfs/store/MIAR/merged/osn/takeoffs28_mass_may_22.parquet"
)

# t = Traffic.from_file(
#     "/mnt/beegfs/store/MIAR/merged/osn/takeoffs28_mass_complete.parquet"
# )

***

In [None]:
t_red = t.sample(100)

In [None]:
input_len = 10
output_len = 60
inputs = []
outputs = []

for flight in tqdm(t_red):
    data = flight.data
    for i in range(len(data) - input_len - output_len + 1):
        inputs.append(
            data.iloc[i : i + input_len][["latitude", "longitude", "altitude"]]
        )
        outputs.append(
            data.iloc[i + input_len : i + input_len + output_len][
                ["latitude", "longitude", "altitude"]
            ]
        )

In [None]:
inputs[0].to_numpy()

In [None]:
outputs[0].to_numpy()

In [None]:
flightlist = []

for flight in tqdm(t):
    flightlist.append(flight)

In [None]:
from multiprocessing import Pool

input_len = 10
output_len = 60
inputs = []
outputs = []


def process_element(flight):
    data = flight.data
    for i in range(len(data) - input_len - output_len + 1):
        inputs.append(data.iloc[i : i + input_len])
        outputs.append(data.iloc[i + input_len : i + input_len + output_len])
    return (inputs, outputs)


with Pool(processes=20) as pool:
    result_dfs = pool.map(process_element, flightlist)
    results = list(
        tqdm(pool.imap(process_element, flightlist), total=len(flightlist))
    )


# def process_list(your_list):
#     with Pool() as pool:
#         result_dfs = pool.map(process_element, your_list)
#     return result_dfs

In [None]:
def process_data(flight):
    try:
        t_start = flight.aligned_on_runway("LSZH")[0].stop
        flight.data = flight.data[flight.data.timestamp > t_start]
        if len(flight) > 200:
            return flight
    except:
        pass

In [None]:
import pandas as pd
from multiprocessing import Pool, Manager

# Assuming 'data', 'input_len', and 'output_len' are predefined


def process_batch(args):
    start, end, input_len, output_len = args
    local_inputs = []
    local_outputs = []
    for i in range(start, end):
        local_inputs.append(data.iloc[i : i + input_len])
        local_outputs.append(
            data.iloc[i + input_len : i + input_len + output_len]
        )
    return local_inputs, local_outputs


def parallel_process(data, input_len, output_len, num_processes=1):

    # Calculate batch size for each process
    total_items = len(data) - input_len - output_len + 1
    batch_size = total_items // num_processes

    # Create arguments for each batch
    batches = [
        (i, min(i + batch_size, total_items), input_len, output_len)
        for i in range(0, total_items, batch_size)
    ]

    # Process the batches in parallel
    with Pool(processes=num_processes) as pool:
        results = pool.map(process_batch, batches)

    # Merge results from all processes
    all_inputs = [input for result in results for input in result[0]]
    all_outputs = [output for result in results for output in result[1]]

    return all_inputs, all_outputs


# Example usage
num_processes = 20  # Adjust this based on your machine's capabilities
inputs, outputs = parallel_process(data, input_len, output_len, num_processes)

In [None]:
input_len = 10
output_len = 60
inputs = []
outputs = []

for flight in tqdm(t):
    data = flight.data
    for i in range(len(data) - input_len - output_len + 1):
        inputs.append(data.iloc[i : i + input_len])
        outputs.append(data.iloc[i + input_len : i + input_len + output_len])

In [None]:
from multiprocessing import Pool

In [None]:
def process_slice(args):
    i, input_len, output_len = args
    input_slice = data.iloc[i : i + input_len]
    output_slice = data.iloc[i + input_len : i + input_len + output_len]
    return input_slice, output_slice


def parallel_process(data, input_len, output_len, num_processes=20):
    args = [
        (i, input_len, output_len)
        for i in range(len(data) - input_len - output_len + 1)
    ]

    with Pool(processes=num_processes) as pool:
        results = pool.map(process_slice, args)

    inputs, outputs = zip(*results)  # Unpack results
    return list(inputs), list(outputs)


# Example usage
inputs, outputs = parallel_process(data, input_len, output_len)

In [None]:
for flight in tqdm(t):
    data = flight.data

In [None]:
data

In [None]:
def generate_training_set(flight, index, input_len, output_len):
    data = flight.data
    for i in range(len(data) - input_len - output_len):
        yield data.iloc[i : i + input_len], data.iloc[
            i + input_len : i + input_len + output_len
        ]

In [None]:
generate_training_set(flight, 0, 10, 5)

In [None]:
inputs = []
outputs = []

# for i in range(len(data) - input_len - output_len):
#     inputs.append(
#         data.iloc[i : i + input_len][["altitude"]].reset_index(drop=True)
#     )
#     outputs.append(
#         data.iloc[i + input_len : i + input_len + output_len][
#             ["altitude"]
#         ].reset_index(drop=True)
#     )

In [None]:
for flight in tqdm(t):
    data = flight.data
    for i in range(len(data) - input_len - output_len + 1):
        inputs.append(data.iloc[i : i + input_len])
        outputs.append(data.iloc[i + input_len : i + input_len + output_len])

In [None]:
inputs

In [None]:
outputs[-1]

In [None]:
inputs[0]

In [None]:
inputs[1]

In [None]:
outputs[1]

In [None]:
input_len = 5
output_len = 60
i = 0

end_input = i + input_len
end_output = i + input_len + output_len

In [None]:
data.iloc[i:end_input][["latitude", "longitude"]].reset_index(drop=True)

In [None]:
data.iloc[end_input:end_output][["latitude", "longitude"]].reset_index(
    drop=True
)

In [None]:
for flight in tqdm(t):
    pass

In [None]:
fig = px.scatter_mapbox(
    t.sample(100).data,
    lat="latitude",
    lon="longitude",
    color="altitude",
    mapbox_style="carto-positron",
)

fig.update_layout(
    width=1000,
    height=600,
    margin=dict(
        l=0,  # left margin
        r=0,  # right margin
        b=0,  # bottom margin
        t=0,  # top margin
        pad=0,  # padding
    ),
)
fig.show()