In [None]:
from zipfile import ZipFile
import os
import pandas as pd
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [None]:
def read_and_combine(data_folder):
    dframes = []
    for file in os.listdir(data_folder):
        # print(file)
        if file.endswith(".csv"):
            df = pd.read_csv(os.path.join(data_folder, file), sep=";", low_memory=False)
            dframes.append(df)
    return pd.concat(dframes)

In [None]:
def prepare_data(df, time_column: str):
    columns = ["Melding ID", "Radiokallesignal (ERS)", time_column, "Havn (kode)", "Kvantum type (kode)", "Rundvekt"]
    df = df[columns].drop_duplicates()
    df[time_column] = pd.to_datetime(df[time_column], dayfirst=True, format="mixed")
    return df

In [None]:
dep_data = prepare_data(read_and_combine("data_test/dep/"), "Avgangstidspunkt")
por_data = prepare_data(read_and_combine("data_test/por"), "Ankomsttidspunkt")
dep_data = dep_data.sort_values("Avgangstidspunkt")
por_data = por_data.sort_values("Ankomsttidspunkt")

In [None]:
dep_data["Melding ID"].value_counts()

In [None]:
dep_data[dep_data["Melding ID"] == 111868]

In [None]:
dep_data["Radiokallesignal (ERS)"].unique()

In [None]:
dep = dep_data[dep_data["Radiokallesignal (ERS)"] == "LLAS"]
agg_func = {
    "Melding ID": "first",
    "Radiokallesignal (ERS)": "first",
    "Avgangstidspunkt": "first",
    "Kvantum type (kode)": "first",
    "Havn (kode)": "first",
    "Rundvekt": "sum"
}
dep_agg = dep.groupby("Melding ID", as_index=False).aggregate(agg_func)
dep_agg = dep_agg.sort_values("Avgangstidspunkt")
dep_agg

In [None]:
# Group and sum KG and OB my Message ID
por = por_data[por_data["Radiokallesignal (ERS)"] == "LLAS"]
agg_func = {
    "Melding ID": "first",
    "Radiokallesignal (ERS)": "first",
    "Ankomsttidspunkt": "first",
    "Kvantum type (kode)": "first",
    "Rundvekt": "sum"
}
por_agg = por.groupby(["Melding ID", "Kvantum type (kode)"], as_index=False).aggregate(agg_func)
por_piv = por_agg.pivot(index="Melding ID", columns="Kvantum type (kode)", values="Rundvekt")
por_final = por_piv.join(por[["Melding ID", "Radiokallesignal (ERS)", "Ankomsttidspunkt", "Havn (kode)"]].set_index("Melding ID"), on="Melding ID").drop_duplicates()
por_final = por_final.sort_values("Ankomsttidspunkt").reset_index()
por_final

In [None]:
## Fish trip algorithm

"""
trips = []

for each i,j in dep, por:
    if i.time < j.time:
        start = i
        if j.KG == j.OB:
            trips.append(start, j)
        else:
            j++
    else:
        j++
"""


In [None]:
trips = []
i,j = 0,0
while j < len(por_final):
    # print(j, por_final.iloc[j]["Melding ID"])
    print(i,j)
    if not trips:
        pass
    elif (dep_agg.iloc[i]["Avgangstidspunkt"] >= trips[-1][0]["Avgangstidspunkt"] and 
            dep_agg.iloc[i]["Avgangstidspunkt"] < trips[-1][1]["Ankomsttidspunkt"]):
        i += 1
        continue
        
    if dep_agg.iloc[i]["Avgangstidspunkt"] < por_final.iloc[j]["Ankomsttidspunkt"]:
        start = dep_agg.iloc[i]
        if por_final.iloc[j]["KG"] == por_final.iloc[j]["OB"]:
            print("trip,", i, j)
            trips.append((start, por_final.iloc[j]))
            # i += 1
            j += 1
        else:
            j += 1
            
    else:
        i += 1

for dep, por in trips:
    print(dep["Avgangstidspunkt"], por["Ankomsttidspunkt"])