In [3]:
import os
import sys
from pathlib import Path

cwd = Path(os.getcwd())
repo = cwd.parent.parent
sys.path.append(str(cwd))
sys.path.append(str(repo))
sys.path.append(str(repo / "drafts"))
os.chdir(repo / "src" / "backend")

# Distance attribute creation

In [4]:
from data.evaluation.src.jupyter_utils.preamble import *
from ocel.default_ocel import *

## Import

In [None]:
ocel = OCELWrapper.read_ocel2_sqlite_with_report(
    config.DATA_DIR / "event_logs" / "order-management-v1.sqlite",
    version_info=True,
    output=True,
)

## Explore OCEL structure

In [None]:
ocel.events

In [None]:
ocel.activities

In [None]:
ocel.objects_per_event.loc["send package",:].unstack()

- every event "send package" is related to exactly one *packages* object

# Add `distance` event attribute

## Extract package-customer relations

### via object interactions

In [None]:
otypes = {"customers", "orders", "items", "packages"}
interactions = ocel.object_relations(otype1_filter=otypes, otype2_filter=otypes)
interactions

In [None]:
OG = nx.from_pandas_edgelist(interactions, source="ocel:oid_1", target="ocel:oid_2")
cc = list(nx.connected_components(OG))

print(len(OG.nodes()), "nodes")
print(len(OG.edges()), "edges")
print(len(cc), "components\n")

print(ocel.otype_counts)

In [None]:
customers = set(ocel.objects[ocel.objects["ocel:type"] == "customers"]["ocel:oid"])
[len(customers & C) for C in cc]

In [None]:
compdf = pd.DataFrame(list(enumerate(cc)), columns=["comp", "ocel:oid"])
compdf = compdf.explode("ocel:oid").reset_index(drop=True).pipe(ocel.join_otype)

customer_comp = compdf[compdf["ocel:type"] == "customers"][["ocel:oid", "comp"]].rename(
    columns={"ocel:oid": "customer_oid"}
)
package_comp = compdf[compdf["ocel:type"] == "packages"][["ocel:oid", "comp"]].rename(
    columns={"ocel:oid": "package_oid"}
)
customer_package = customer_comp.merge(package_comp, on="comp")
send_package_customer = (
    ocel.filter_relations(activity="send package", otype="packages")
    .rename(columns={"ocel:oid": "package_oid"})
    .merge(customer_package, on="package_oid")
)
send_package_customer

In [70]:
assert (send_package_customer.groupby("ocel:eid")["customer_oid"].nunique() == 1).all()

## Assign distance to customer

In [None]:
MIN_DISTANCE = 50
MAX_DISTANCE = 2000

customers = ocel.objects[ocel.objects["ocel:type"] == "customers"].copy()
customers["distance"] = np.round(MIN_DISTANCE + np.random.rand(len(customers)) * (MAX_DISTANCE - MIN_DISTANCE), decimals=3)
customers

## Copy distance to "send package" event

In [None]:
send_package_customer = send_package_customer.merge(customers[["ocel:oid", "distance"]], left_on="customer_oid", right_on="ocel:oid")
send_package_customer

## Change ``pm4py`` OCEL object and export

In [74]:
customers1 = customers.set_index("ocel:oid")
send_package_customer1 = send_package_customer.set_index("ocel:eid")

In [None]:
objects1 = ocel.objects.set_index("ocel:oid")
objects1["distance"] = customers1["distance"]
objects = objects1.reset_index()
objects

In [None]:
events1 = ocel.events.set_index("ocel:eid")
events1["distance"] = send_package_customer1["distance"]
events = events1.reset_index()
events

In [77]:
ocel.ocel.objects = objects
ocel.ocel.events = events

In [84]:
EXPORT_PATH = config.DATA_DIR / "event_logs/order-management-v1-with-distances.sqlite"

In [80]:
pm4py.write_ocel2_sqlite(ocel.ocel, EXPORT_PATH)

## Import the new OCEL

In [None]:
ocel1 = OCELWrapper.read_ocel2_sqlite_with_report(EXPORT_PATH, version_info=True, output=True)

In [None]:
ocel1.events

## Not implemented: Add `location` object attribute

In [None]:
# customers = ocel.objects[ocel.objects["ocel:type"] == "customers"][["ocel:oid", "ocel:type"]].copy()
# cities = pd.read_csv("D:/Dropbox/RWTH/23ws - Master thesis/OCEL analysis/orderManagementLocations.csv", sep=";")
# customers["city"] = cities["city"]
# customers

In [None]:
# from geopy.geocoders import Nominatim
# import functools


# # initialize Nominatim API
# geolocator = Nominatim(user_agent="ocean")

# @functools.lru_cache
# def search_location(place: str):
#     return geolocator.geocode(place)

In [None]:
# def get_coords(location):
#     if location is None:
#         return None, None
#     lat = location.latitude
#     long = location.longitude
#     return lat, long

# # # search locations via API
# customers["coords"] = customers["city"].apply(search_location).apply(get_coords)

# customers


In [None]:
# from cartopy import crs as ccrs, feature as cfeature
# import matplotlib.pyplot as plt

# crs = ccrs.PlateCarree()
# plt.figure(figsize=(12,12))
# ax = plt.axes(projection=ccrs.Mollweide())
# # ax.set_extent([longmin - longpad, longmax + longpad, latmin - latpad, latmax + latpad], crs=crs)

# ax.add_feature(cfeature.LAND)
# ax.add_feature(cfeature.COASTLINE, linewidth=.5)

# ax.add_feature(cfeature.BORDERS, linewidth=.5, alpha=.5)
# ax.add_feature(cfeature.STATES, alpha=.25, linewidth=.25)

# ax.add_feature(cfeature.LAKES, alpha=0.5)
# ax.add_feature(cfeature.RIVERS, linewidth=.5, alpha=.5)

# ax.scatter(x=customers["coords"].apply(lambda c: c[1]), y=customers["coords"].apply(lambda c: c[0]), s=4, transform=crs)
# for i, row in customers.iterrows():
#     label = f"{row['ocel:oid']} ({row['city']})"
#     print(label)
#     ax.annotate(label, row["coords"], xytext=(5,5), textcoords="offset pixels", transform=crs, fontsize=20, color="black")

# plt.show()