In [None]:
from zipfile import ZipFile
import os
import pandas as pd
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [None]:
def read_and_combine(data_folder):
    dframes = []
    for file in os.listdir(data_folder):
        # print(file)
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(data_folder, file), sep=";", low_memory=False)
            dframes.append(df)
    return pd.concat(dframes)

In [None]:
def prepare_data(df, time_column: str):
    columns = ["Melding ID", "Radiokallesignal (ERS)", time_column, "Havn (kode)", "Kvantum type (kode)", "Rundvekt"]
    df = df[columns].drop_duplicates()
    df[time_column] = pd.to_datetime(df[time_column], dayfirst=True, format="mixed")
    return df

In [None]:
dep_data = prepare_data(read_and_combine("data_test/dep/"), "Avgangstidspunkt")
por_data = prepare_data(read_and_combine("data_test/por"), "Ankomsttidspunkt")
dep_data = dep_data.sort_values("Avgangstidspunkt")
por_data = por_data.sort_values("Ankomsttidspunkt")

In [None]:
dep_data["Melding ID"].value_counts()

In [None]:
dep_data[dep_data["Melding ID"] == 111868]

In [None]:
dep_data["Radiokallesignal (ERS)"].unique()

In [None]:
dep = dep_data[dep_data["Radiokallesignal (ERS)"] == "LJRD"]
agg_func = {
    "Melding ID": "first",
    "Radiokallesignal (ERS)": "first",
    "Avgangstidspunkt": "first",
    "Kvantum type (kode)": "first",
    "Havn (kode)": "first",
    "Rundvekt": "sum"
}
dep_agg = dep.groupby("Melding ID", as_index=False).aggregate(agg_func)
dep_agg = dep_agg.sort_values("Avgangstidspunkt").reset_index()
dep_agg = dep_agg.drop("index", axis=1)
dep_agg

In [None]:
# Group and sum KG and OB my Message ID
por = por_data[por_data["Radiokallesignal (ERS)"] == "LJRD"]
agg_func = {
    "Melding ID": "first",
    "Radiokallesignal (ERS)": "first",
    "Ankomsttidspunkt": "first",
    "Kvantum type (kode)": "first",
    "Rundvekt": "sum"
}
por_agg = por.groupby(["Melding ID", "Kvantum type (kode)"], as_index=False).aggregate(agg_func)
por_piv = por_agg.pivot(index="Melding ID", columns="Kvantum type (kode)", values="Rundvekt")
por_final = por_piv.join(por[["Melding ID", "Radiokallesignal (ERS)", "Ankomsttidspunkt", "Havn (kode)"]].set_index("Melding ID"), on="Melding ID").drop_duplicates()
por_final = por_final.sort_values("Ankomsttidspunkt").reset_index()
por_final

In [None]:
# Keep this
temp_dep = dep_agg.T
temp_por = por_final.T
dep_total = len(temp_dep.columns)
por_total = len(temp_por.columns)


i,j = 0,0

# FIX BUG: Sometimes the departure for the next trip starts before
# the arrival of the previous trip.
# Example vessel ids: LIRC,

# FIX BUG: Out of index error.
# Example vessels: LANV, LAKF

# FIX BUG: Departure time is after Arrival
# Example vessels: LJRD
trips = []
while i < dep_total and j < por_total:
    # print(i,j)
    start = temp_dep.pop(i)
    end = temp_por.pop(j)
    # print("Getting start:")
    while i < dep_total:
        # print(i,j)
        # print(temp_dep.columns)
        if i + 1 == dep_total:
            break
        else:
            new_start = temp_dep.pop(i+1)
        if new_start["Avgangstidspunkt"] > end["Ankomsttidspunkt"]:
            temp_dep.insert(0, new_start.name, new_start)
            # print("After Insertion: ",temp_dep.columns)
            i += 1
            break
        elif start["Rundvekt"] != 0:
            start = new_start
            i += 1
        else:
            i += 1
    
    # print(start.name)

    # print("Getting end:")
    while True:
        # print(i,j)
        if end["OB"] != end["KG"]:
            end = temp_por.pop(j+1)
            j += 1
        else:
            j += 1
            break

    # print(end.name)
    trip = [start, end]
    trips.append(trip)


for start, end in trips:
    print(start.name, end.name, start["Avgangstidspunkt"], end["Ankomsttidspunkt"], start["Havn (kode)"], end["Havn (kode)"])