# imports

In [1]:
from core_config_reader_lib import spark_conf

# from datetime import datetime
from typing import Optional

import pandas as pd
from dateutil.relativedelta import relativedelta
from sklearn.model_selection import train_test_split
from pyspark.sql import functions as f, SQLContext, DataFrame
from pyspark.sql.types import *

In [2]:
import datetime as dt
import json

In [3]:
pd.set_option('display.max_rows', 100)

In [4]:
TS = "ts"

In [5]:
def _save_wanted_time(df: pd.DataFrame, latest_date, months, is_include_last_date=False) -> pd.DataFrame:
    """
    save 2 years in the set
    :return:
    """
    delta = latest_date - relativedelta(months=months)
    if is_include_last_date:
        df = df[(df[TS] >= delta) & (df[TS] <= latest_date)]
    else:
        df = df[(df[TS] >= delta) & (df[TS] < latest_date)]
    return df


def _custom_train_test_split(df: pd.DataFrame, latest_date) -> (pd.DataFrame, pd.DataFrame):
    """
    cuts to ratio of train and set according to the time
    :return:
    """
    cutoff = latest_date - relativedelta(months=3)
    test_df = df[df[TS] >= cutoff]
    train_df = df[df[TS] < cutoff]
    return train_df, test_df


def clean_and_split_df(df: pd.DataFrame) -> (pd.DataFrame, pd.DataFrame):
    df = df[df['next_pc'].notna()]
    df[TS] = pd.to_datetime(df[TS], format='%Y-%m-%d %H:%M:%S')
    # df[TS] = df[TS].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S'))
    latest_date = df[TS].max()
    df = _save_wanted_time(df, latest_date, 24, is_include_last_date=True)
    # train_df, test_df = _custom_train_test_split(df, latest_date)
    df = df.sort_values(by=["ts"], ascending=True)
#     train_df, test_df = train_test_split(df, test_size=0.2, shuffle=False)
#     return train_df, test_df
    return df


def get_last_year_resolved_to_pc(vessel_df: pd.DataFrame, timestamp) -> Optional[list]:
    last_year_df = _save_wanted_time(vessel_df, timestamp, 12)
    if last_year_df.empty or abs(
            (timestamp - last_year_df[TS].max()).days) < 90:  # if there is no delta of days or there is less than three months of data
        return None
    all_history = last_year_df.apply(
        lambda x: [x["destination"], x["resolved_dest"], x["next_pc"]], axis=1).tolist()
    return all_history

In [6]:
all_points_df = pd.read_csv("enriched_destinations.csv")

In [7]:
print(f"Cleaning and splitting df to train and test sets")
all_points_df = clean_and_split_df(all_points_df)
print(f"In train there are {all_points_df.shape[0]} rows")
# print(f"In test there are {test_points_df.shape[0]} rows")

Cleaning and splitting df to train and test sets


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In train there are 1178830 rows


# create data

## prepare dataset 

In [8]:
vessels_df = all_points_df.groupby("vessel_id")

In [9]:
final = []

for vessel, vessel_df in all_points_df.groupby("vessel_id"):
    sorted_df = vessel_df.sort_values("ts")
    tupled = vessel_df[["ts", "destination", "resolved_dest", "next_pc"]].values.tolist()
    final.append((vessel, tupled))

In [10]:
print(f"There are {len(final)} vessels after group by")

There are 5445 vessels after group by


### Cut each history per time (1 year before ts)

In [11]:
from tqdm import tqdm
import datetime as dt

def find_latest_point(min_ts, tuples):
    for ix, point in enumerate(tuples):
        if (point[0] - dt.timedelta(365 + 1) > min_ts):
            return ix
    return None
        

per_vessel = []

for i in tqdm(range(0, len(final)), desc ="per_vessel"):
    tuples = final[i][1]
    min_ts = tuples[0][0]
    vessel = final[i][0]
    wanted = []
    last_ts_ix = 0
    for j in range(1, len(tuples)):
        ts = tuples[j][0]
        if (ts > min_ts + dt.timedelta(365 + 1)):
            wanted.append(tuples[0:j])
            continue
    per_vessel.append([vessel, wanted])

per_vessel: 100%|██████████| 5445/5445 [00:07<00:00, 736.85it/s] 


In [None]:
def find_first(tuples, last_ts, ix):
    if ix >= len(tuples):
        return len(tuples)
    first_ts = tuples[ix][0]
    if (last_ts - first_ts).days < 365 + 1:
        return ix
    return find_first(tuples, last_ts, ix+1)

clean_per_vessel = []
for vessel, points_list in tqdm(per_vessel):
    blah = []
    for tuples in points_list:
        last_ts = tuples[-1][0]
        first_ts = tuples[0][0]
        first_ix = find_first(tuples, last_ts, 0)
        blah.append(tuples[first_ix:])
    clean_per_vessel.append([vessel, blah])

  6%|▌         | 309/5445 [00:14<03:58, 21.53it/s]

## start rule-based

### Get only visit history that matches reported destination

In [14]:
personilized_prediction = []

for vessel, points_list in tqdm(clean_per_vessel):
    for tuples in points_list:
        last_point = tuples[-1]
        reported = last_point[1]
        suggested_dest = last_point[2]
        real_dest = last_point[3]
        match = True if real_dest == suggested_dest else False
        timestamp = last_point[0]
        found_destinations = []
        for point in tuples[:-1]:
            if point[1] == reported:  # TODO - this like is 1:1 match!!!
                found_destinations.append(
                    {"baseline": point[2], "actual_pc": point[3]})
        personilized_prediction.append({"vessel": vessel, "reported": reported,
                                       "suggested_dest": suggested_dest, "real_dest": real_dest, "match": match,
                                        "ts": timestamp, "found_destinations": found_destinations})

100%|██████████| 5445/5445 [00:13<00:00, 412.44it/s]


In [15]:
print(f"There are {len(personilized_prediction)} individual, per vessel, timestamp and reported destination.")

There are 547743 individual, per vessel, timestamp and reported destination.


### create visit history count (actual logic application)

In [16]:
def actual_logic_func(suggested_dest, found_destinations):
    if not found_destinations:
        return None
    
    if is_nan(suggested_dest):
        return None
    
    suggested = {}
    for baseline_vs_real in found_destinations:
        if suggested_dest != baseline_vs_real["actual_pc"]:
            if baseline_vs_real["actual_pc"] in suggested:
                suggested[baseline_vs_real["actual_pc"]] += 1
            else:
                suggested[baseline_vs_real["actual_pc"]] = 1
        elif suggested_dest == baseline_vs_real["actual_pc"]:
            if "baseline" in suggested:
                suggested["baseline"] += 1
            else:
                suggested["baseline"] = 1
    
    return suggested if len(suggested) != 0 else None

In [17]:
def is_nan(x):
    return (x != x)

for ix in range(0, len(personilized_prediction)):
    per_vessel = personilized_prediction[ix]
    res = actual_logic_func(per_vessel["suggested_dest"], per_vessel["found_destinations"])
    personilized_prediction[ix]["new"] = res

In [26]:
personilized_prediction[0]["ts"].isoformat()

'2021-09-05T19:54:40'

In [19]:
# count = 0
# bigger_len = 0
# for vessel in personilized_prediction:
#     if vessel["new"]:
#         blah = str(vessel["new"])
#         if blah != "[nan]" and count < 50:
#             count += 1
#             print(vessel["new"])

## write results

In [33]:
def serialize_timestamp(obj):
    if isinstance(obj, pd.Timestamp):
        return obj.isoformat()
    raise TypeError

In [34]:
with open("personalized.json", mode="w") as f:
    json.dump(personilized_prediction, f, default=serialize_timestamp)

# read data to pandas

In [24]:
def deserialize_timestamp(obj):
    if isinstance(obj, str) and "-" in obj and ":" in obj and "T" in obj:
        return pd.Timestamp(obj)
    return obj

In [25]:
with open("personalized.json", mode="r") as f:
    raw_personilized_prediction = json.load(f)

In [26]:
raw_personilized_prediction[0]

{'vessel': '513a7b806ca0bbcb46a5a1e3',
 'reported': 'JP SBK',
 'suggested_dest': '5358fc78b68ca120a07dbbc9',
 'real_dest': '5358fc78b68ca120a07dbbc9',
 'match': True,
 'ts': '2021-09-05T19:54:40',
 'found_destinations': [{'baseline': '5358fc78b68ca120a07dbbc9',
   'actual_pc': '5358fc78b68ca120a07dbbc9'},
  {'baseline': '5358fc78b68ca120a07dbbc9',
   'actual_pc': '5358fc78b68ca120a07dbbc9'},
  {'baseline': '5358fc78b68ca120a07dbbc9',
   'actual_pc': '5358fc78b68ca120a07dbcbb'},
  {'baseline': '5358fc78b68ca120a07dbbc9',
   'actual_pc': '5358fc78b68ca120a07dbcbb'},
  {'baseline': '5358fc78b68ca120a07dbbc9',
   'actual_pc': '5358fc78b68ca120a07dbbc9'}],
 'new': {'baseline': 3, '5358fc78b68ca120a07dbcbb': 2}}

In [43]:
personilized_prediction_df = pd.DataFrame(raw_personilized_prediction)

In [44]:
personilized_prediction_df[TS] = pd.to_datetime(personilized_prediction_df[TS],format='%Y-%m-%dT%H:%M:%S')

In [45]:
personilized_prediction_df[:2]

Unnamed: 0,vessel,reported,suggested_dest,real_dest,match,ts,found_destinations,new
0,513a7b806ca0bbcb46a5a1e3,JP SBK,5358fc78b68ca120a07dbbc9,5358fc78b68ca120a07dbbc9,True,2021-09-05 19:54:40,"[{'baseline': '5358fc78b68ca120a07dbbc9', 'act...","{'baseline': 3, '5358fc78b68ca120a07dbcbb': 2}"
1,513a7b806ca0bbcb46a5a1e3,JP OSA H,5358fc78b68ca120a07dbcbb,5358fc78b68ca120a07dbcbb,True,2021-09-08 07:23:00,"[{'baseline': '5358fc78b68ca120a07dbcbb', 'act...",{'baseline': 1}


## what to return

In [46]:
def match_best_prediction(suggested_dest, predicted_list: dict):
    max_visits_count = 0
    max_visits_polygon = None
    if predicted_list is None:
        return None
    
    for polygon, count in predicted_list.items():
        if polygon == "baseline":
            return suggested_dest
        else:
            if count > max_visits_count:
                max_visits_count = count
                max_visits_polygon = polygon
    return max_visits_polygon


def match_best_prediction_without_baseline(suggested_dest, predicted_list: dict):
    max_visits_count = 0
    max_visits_polygon = None
    if predicted_list is None:
        return None
    
    for polygon, count in predicted_list.items():
        if count > max_visits_count:
            max_visits_count = count
            max_visits_polygon = polygon
    return max_visits_polygon if max_visits_polygon != "baseline" else suggested_dest


In [47]:
personilized_prediction_df["new_suggestion"] = personilized_prediction_df[["suggested_dest", "new"]].apply(
    lambda x: match_best_prediction(x["suggested_dest"], x["new"]), axis=1)

In [48]:
personilized_prediction_df["without_baseline"] = personilized_prediction_df[["suggested_dest", "new"]].apply(
    lambda x: match_best_prediction_without_baseline(x["suggested_dest"], x["new"]), axis=1)

In [60]:
clean_personilized_prediction_df = personilized_prediction_df[personilized_prediction_df.apply(lambda x: x["new"] is not None, axis=1)]

In [50]:
personilized_prediction_df = personilized_prediction_df.rename(columns={"suggested_dest": "bl_dest", 
                                           "new_suggestion": "suggested_dest", 
                                           "without_baseline": "suggested_no_bl_dest",
                                           "match": "bl_match"})

In [52]:
personilized_prediction_df["suggested_match"] = personilized_prediction_df["suggested_dest"] == personilized_prediction_df["real_dest"]
personilized_prediction_df["bl_match"] = personilized_prediction_df["bl_dest"] == personilized_prediction_df["real_dest"]
personilized_prediction_df["suggested_no_base_match"] = personilized_prediction_df["suggested_no_bl_dest"] == personilized_prediction_df["real_dest"]
personilized_prediction_df["bl_suggested"] = personilized_prediction_df["suggested_dest"] == personilized_prediction_df["bl_dest"]

In [53]:
personilized_prediction_df

Unnamed: 0,vessel,reported,bl_dest,real_dest,bl_match,ts,found_destinations,new,suggested_dest,suggested_no_bl_dest,suggested_match_2,suggested_no_base_match,bl_suggested,suggested_match
0,513a7b806ca0bbcb46a5a1e3,JP SBK,5358fc78b68ca120a07dbbc9,5358fc78b68ca120a07dbbc9,True,2021-09-05 19:54:40,"[{'baseline': '5358fc78b68ca120a07dbbc9', 'act...","{'baseline': 3, '5358fc78b68ca120a07dbcbb': 2}",5358fc78b68ca120a07dbbc9,5358fc78b68ca120a07dbbc9,True,True,True,True
1,513a7b806ca0bbcb46a5a1e3,JP OSA H,5358fc78b68ca120a07dbcbb,5358fc78b68ca120a07dbcbb,True,2021-09-08 07:23:00,"[{'baseline': '5358fc78b68ca120a07dbcbb', 'act...",{'baseline': 1},5358fc78b68ca120a07dbcbb,5358fc78b68ca120a07dbcbb,True,True,True,True
2,513a7b806ca0bbcb46a5a1e3,JP UKB,5358fc78b68ca120a07dbc9e,5358fc78b68ca120a07dbc9e,True,2021-09-08 19:54:40,"[{'baseline': '5358fc78b68ca120a07dbc9e', 'act...",{'baseline': 4},5358fc78b68ca120a07dbc9e,5358fc78b68ca120a07dbc9e,True,True,True,True
3,513a7b806ca0bbcb46a5a1e3,KR PUS,5358fc78b68ca120a07dbc71,5358fc78b68ca120a07dbc71,True,2021-09-09 13:45:35,"[{'baseline': '5358fc78b68ca120a07dbc71', 'act...",{'baseline': 3},5358fc78b68ca120a07dbc71,5358fc78b68ca120a07dbc71,True,True,True,True
4,513a7b806ca0bbcb46a5a1e3,CN SHA,5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,True,2021-09-11 05:47:44,"[{'baseline': '5358fc78b68ca120a07dbcfb', 'act...",{'baseline': 15},5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,True,True,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547738,62d6e20e6e5c10c029df7f37,IDSRI,5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,True,2022-08-07 13:50:38,[],,,,False,False,False,False
547739,62d6e20e6e5c10c029df7f37,ID SUB,5358fc78b68ca120a07dbc01,5358fc78b68ca120a07dbc01,True,2022-08-12 07:45:02,"[{'baseline': '5358fc78b68ca120a07dbc01', 'act...",{'baseline': 19},5358fc78b68ca120a07dbc01,5358fc78b68ca120a07dbc01,True,True,True,True
547740,62d6e20e6e5c10c029df7f37,ID SRI,5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,True,2022-08-15 08:11:07,"[{'baseline': '5358fc78b68ca120a07db74f', 'act...","{'baseline': 21, '5358fc78b68ca120a07dbc01': 5}",5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,True,True,True,True
547741,62d6e20e6e5c10c029df7f37,ID SUB,5358fc78b68ca120a07dbc01,5358fc78b68ca120a07dbc01,True,2022-08-20 07:59:57,"[{'baseline': '5358fc78b68ca120a07dbc01', 'act...",{'baseline': 20},5358fc78b68ca120a07dbc01,5358fc78b68ca120a07dbc01,True,True,True,True


In [108]:
personilized_prediction_df[personilized_prediction_df["bl_match"] == False][:40]

Unnamed: 0,vessel,reported,bl_dest,real_dest,bl_match,ts,found_destinations,new,suggested_dest,suggested_no_bl_dest,suggested_match,suggested_no_base_match,bl_suggested
7,513a7b806ca0bbcb46a5a1e3,KH SHV,5358fc78b68ca120a07db7cb,54880c9b2f16092e9efe8a43,False,2021-09-23 05:49:21,"[{'baseline': '5358fc78b68ca120a07db7cb', 'act...","{'54880c9b2f16092e9efe8a3f': 1, 'baseline': 5}",5358fc78b68ca120a07db7cb,5358fc78b68ca120a07db7cb,False,False,True
51,513a7b806ca0bbcb46a5a1e3,CN NGB,5358fc78b68ca120a07db9f4,5358fc78b68ca120a07dbcfb,False,2022-01-02 16:23:28,"[{'baseline': '5358fc78b68ca120a07db9f4', 'act...","{'5358fc78b68ca120a07dbcfb': 1, 'baseline': 8}",5358fc78b68ca120a07db9f4,5358fc78b68ca120a07db9f4,False,False,True
64,513a7b806ca0bbcb46a5a1e3,TH LCH,54880c9b2f16092e9efe8a43,5358fc77b68ca120a07db205,False,2022-02-01 00:00:00,"[{'baseline': '54880c9b2f16092e9efe8a43', 'act...",{'baseline': 14},54880c9b2f16092e9efe8a43,54880c9b2f16092e9efe8a43,False,False,True
83,513a7b806ca0bbcb46a5a1e3,CN SHA,5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07db9f4,False,2022-03-16 04:59:07,"[{'baseline': '5358fc78b68ca120a07dbcfb', 'act...",{'baseline': 15},5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,False,False,True
99,513a7b806ca0bbcb46a5a1e3,VN SGN,5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07db7cb,False,2022-05-01 00:00:00,"[{'baseline': '5358fc78b68ca120a07dbaa9', 'act...","{'baseline': 21, '5358fc78b68ca120a07dbc52': 1}",5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07dbaa9,False,False,True
118,513a7b806ca0bbcb46a5a1e3,JP TYN,5358fc78b68ca120a07dbcff,5358fc78b68ca120a07dbd00,False,2022-06-17 10:17:20,[],,,,False,False,False
119,513a7b806ca0bbcb46a5a1e3,JP TYO CW,5358fc78b68ca120a07dbcff,5358fc78b68ca120a07dbd00,False,2022-06-17 10:35:19,"[{'baseline': '5358fc78b68ca120a07dbcff', 'act...",{'baseline': 3},5358fc78b68ca120a07dbcff,5358fc78b68ca120a07dbcff,False,False,True
169,5153880066556bcfc5c43d11,JIANGYIN,537ca63f72ef82043d62100f,57c539e9447c1ee77504c390,False,2021-09-26 08:55:14,"[{'baseline': '537ca63f72ef82043d62100f', 'act...",{'57c539e9447c1ee77504c390': 24},57c539e9447c1ee77504c390,57c539e9447c1ee77504c390,True,True,False
178,5153880066556bcfc5c43d11,JIANGYIN,537ca63f72ef82043d62100f,57c539e9447c1ee77504c390,False,2021-10-04 08:38:24,"[{'baseline': '537ca63f72ef82043d62100f', 'act...",{'57c539e9447c1ee77504c390': 25},57c539e9447c1ee77504c390,57c539e9447c1ee77504c390,True,True,False
189,5153880066556bcfc5c43d11,JIANGYIN,537ca63f72ef82043d62100f,57c539e9447c1ee77504c390,False,2021-10-16 07:43:17,"[{'baseline': '537ca63f72ef82043d62100f', 'act...",{'57c539e9447c1ee77504c390': 26},57c539e9447c1ee77504c390,57c539e9447c1ee77504c390,True,True,False


In [74]:
port_names_df = pd.DataFrame([{"port_name": "Savannah", "polygon": "5358fc78b68ca120a07dbbdc"}, 
                              {"port_name": "SanDiego", "polygon": "5358fc78b68ca120a07dbcf8"},
                              {"port_name": "Manzanillo_mexico", "polygon": "5358fc78b68ca120a07dbb1c"},
                              {"port_name": "Manzanillo_panama", "polygon": "5fe89bca62e5fd434e77136f"}])

In [152]:
port_names_df

Unnamed: 0,port_name,polygon
0,Savannah,5358fc78b68ca120a07dbbdc
1,SanDiego,5358fc78b68ca120a07dbcf8
2,Manzanillo_mexico,5358fc78b68ca120a07dbb1c
3,Manzanillo_panama,5fe89bca62e5fd434e77136f


In [130]:
example_1_df = personilized_prediction_df[(personilized_prediction_df["bl_match"] == False) & (personilized_prediction_df["suggested_match"] == True) & (personilized_prediction_df["vessel"] == "5905c34ca696e258790228f3")][["ts", "vessel", "reported", "bl_dest", "real_dest", "suggested_dest", "suggested_match", "bl_match"]]

In [80]:
manzanillo_example_df = personilized_prediction_df[(personilized_prediction_df["real_dest"] == "5358fc78b68ca120a07dbb1c") & 
                           (personilized_prediction_df["bl_dest"] == "5fe89bca62e5fd434e77136f") & 
                           (personilized_prediction_df["vessel"] == "5e1ee618b6f99132279a307f") & 
                           (personilized_prediction_df["suggested_match"] == True) & 
                          (personilized_prediction_df["bl_match"] == False)][["ts", "vessel", "reported", "bl_dest", "real_dest", "suggested_dest", "suggested_match", "bl_match"]]

In [83]:
manzanillo_example_df.merge(port_names_df, left_on=["bl_dest"], right_on=[
                            "polygon"]).rename(columns={"port_name": "bl_name"}).merge(
    port_names_df, left_on=["real_dest"], right_on=["polygon"]).rename(columns={"port_name": "real_name"}).merge(
    port_names_df, left_on=["suggested_dest"], right_on=["polygon"]).rename(columns={"port_name": "suggested_name"})[
    ["ts", "vessel", "reported", "bl_name", "real_name", "suggested_name", "bl_match", "suggested_match"]
].sort_values(TS)

Unnamed: 0,ts,vessel,reported,bl_name,real_name,suggested_name,bl_match,suggested_match
0,2022-02-01,5e1ee618b6f99132279a307f,MANZANILLO,Manzanillo_panama,Manzanillo_mexico,Manzanillo_mexico,False,True


In [150]:
manzanillo_example_df

Unnamed: 0,ts,vessel,reported,bl_dest,real_dest,suggested_dest,suggested_match,bl_match
21337,2022-07-01 00:00:00,5905c346e07bb7588d9c76f7,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
92235,2022-07-01 00:00:00,5905c34b71379e57e146101f,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
96415,2022-08-01 00:00:00,5905c34bca83c24c8075c6db,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
96426,2022-08-25 00:22:25,5905c34bca83c24c8075c6db,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
100549,2021-09-23 06:30:24,5905c34c0328b758c69a58e4,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
100550,2021-10-01 00:00:00,5905c34c0328b758c69a58e4,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
100554,2021-10-25 11:44:05,5905c34c0328b758c69a58e4,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
106892,2022-05-14 08:23:35,5905c34c4558b758c2f709cb,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
113676,2021-09-24 01:51:15,5905c34cd548704c81b04592,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False
113677,2021-10-01 00:00:00,5905c34cd548704c81b04592,MANZANILLO,5fe89bca62e5fd434e77136f,5358fc78b68ca120a07dbb1c,5358fc78b68ca120a07dbb1c,True,False


In [76]:
example_df = personilized_prediction_df[(personilized_prediction_df["bl_match"] == False) & (personilized_prediction_df["suggested_match"] == True) & (personilized_prediction_df["vessel"] == "59091202de94e903c6af3a2f")][["ts", "vessel", "reported", "bl_dest", "real_dest", "suggested_dest", "suggested_match", "bl_match"]]

In [77]:
example_df

Unnamed: 0,ts,vessel,reported,bl_dest,real_dest,suggested_dest,suggested_match,bl_match
462907,2021-10-05 12:44:46,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462911,2021-10-26 16:52:30,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462916,2021-11-17 07:38:01,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462921,2021-12-07 12:33:34,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462925,2021-12-28 12:21:34,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462926,2022-01-01 00:00:00,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462931,2022-01-18 12:55:12,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462936,2022-02-08 10:57:31,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462942,2022-03-01 13:03:34,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False
462946,2022-03-22 23:21:28,59091202de94e903c6af3a2f,US SAN,5358fc78b68ca120a07dbbdc,5358fc78b68ca120a07dbcf8,5358fc78b68ca120a07dbcf8,True,False


In [84]:
manzanillo_example_df.merge(port_names_df, left_on=["bl_dest"], right_on=[
                            "polygon"]).rename(columns={"port_name": "bl_name"}).merge(
    port_names_df, left_on=["real_dest"], right_on=["polygon"]).rename(columns={"port_name": "real_name"}).merge(
    port_names_df, left_on=["suggested_dest"], right_on=["polygon"]).rename(columns={"port_name": "suggested_name"})[
    ["ts", "vessel", "reported", "bl_name", "real_name", "suggested_name", "bl_match", "suggested_match"]
].sort_values(TS)

Unnamed: 0,ts,vessel,reported,bl_name,real_name,suggested_name,bl_match,suggested_match
0,2021-10-05 12:44:46,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
1,2021-10-26 16:52:30,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
2,2021-11-17 07:38:01,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
3,2021-12-07 12:33:34,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
4,2021-12-28 12:21:34,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
5,2022-01-01 00:00:00,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
6,2022-01-18 12:55:12,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
7,2022-02-08 10:57:31,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
8,2022-03-01 13:03:34,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
9,2022-03-22 23:21:28,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True


In [85]:
example_df.merge(port_names_df, left_on=["bl_dest"], right_on=[
                            "polygon"]).rename(columns={"port_name": "bl_name"}).merge(
    port_names_df, left_on=["real_dest"], right_on=["polygon"]).rename(columns={"port_name": "real_name"}).merge(
    port_names_df, left_on=["suggested_dest"], right_on=["polygon"]).rename(columns={"port_name": "suggested_name"})[
    ["ts", "vessel", "reported", "bl_name", "real_name", "suggested_name", "bl_match", "suggested_match"]
].sort_values(TS)

Unnamed: 0,ts,vessel,reported,bl_name,real_name,suggested_name,bl_match,suggested_match
0,2021-10-05 12:44:46,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
1,2021-10-26 16:52:30,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
2,2021-11-17 07:38:01,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
3,2021-12-07 12:33:34,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
4,2021-12-28 12:21:34,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
5,2022-01-01 00:00:00,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
6,2022-01-18 12:55:12,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
7,2022-02-08 10:57:31,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
8,2022-03-01 13:03:34,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True
9,2022-03-22 23:21:28,59091202de94e903c6af3a2f,US SAN,Savannah,SanDiego,SanDiego,False,True


## check matches

In [None]:
# 1

In [55]:
print("Matches between logic and actual PC when baseline is supported:")
personilized_prediction_df["≈"].value_counts(normalize=True)

Matches between logic and actual PC when baseline is supported:


True     0.667344
False    0.332656
Name: suggested_match, dtype: float64

In [57]:
print("Matches between logic and actual PC when baseline is NOT supported:")
personilized_prediction_df["suggested_no_base_match"].value_counts(normalize=True)

Matches between logic and actual PC when baseline is NOT supported:


True     0.665611
False    0.334389
Name: suggested_no_base_match, dtype: float64

In [98]:
clean_personilized_prediction_df["bl_suggested"].value_counts(normalize=True)

True     0.937386
False    0.062614
Name: bl_suggested, dtype: float64

In [61]:
clean_personilized_prediction_df["bl_match"].value_counts(normalize=True)

True     0.86056
False    0.13944
Name: bl_match, dtype: float64

In [64]:
clean_personilized_prediction_df["suggested_match"].value_counts(normalize=True)

True     0.895763
False    0.104237
Name: suggested_match, dtype: float64

In [65]:
clean_personilized_prediction_df["suggested_no_base_match"].value_counts(normalize=True)

True     0.893437
False    0.106563
Name: suggested_no_base_match, dtype: float64

In [None]:
# 2

In [66]:
not_bl_df = clean_personilized_prediction_df[personilized_prediction_df["bl_suggested"] == False]

  """Entry point for launching an IPython kernel.


In [67]:
not_bl_df["suggested_match"].value_counts(normalize=True)

True     0.714962
False    0.285038
Name: suggested_match, dtype: float64

In [59]:
# 3

In [73]:
clean_personilized_prediction_df[clean_personilized_prediction_df["suggested_match"] == True].value_counts(
    ["bl_suggested"], normalize=True)

bl_suggested
True            0.950024
False           0.049976
dtype: float64

In [68]:
clean_personilized_prediction_df.value_counts(["bl_suggested", "suggested_match", "bl_match"], normalize=True)

bl_suggested  suggested_match  bl_match
True          True             True        0.850996
              False            False       0.086390
False         True             False       0.044767
              False            True        0.009565
                               False       0.008283
dtype: float64

In [100]:
clean_personilized_prediction_df[(clean_personilized_prediction_df["bl_suggested"] == True) & (
    clean_personilized_prediction_df["suggested_match"] == False)][["vessel", "reported", "bl_dest", "real_dest", ]][:30]

Unnamed: 0,vessel,reported,bl_dest,real_dest,bl_match,ts,found_destinations,new,suggested_dest,suggested_no_bl_dest,suggested_match_2,suggested_no_base_match,bl_suggested,suggested_match
7,513a7b806ca0bbcb46a5a1e3,KH SHV,5358fc78b68ca120a07db7cb,54880c9b2f16092e9efe8a43,False,2021-09-23 05:49:21,"[{'baseline': '5358fc78b68ca120a07db7cb', 'act...","{'54880c9b2f16092e9efe8a3f': 1, 'baseline': 5}",5358fc78b68ca120a07db7cb,5358fc78b68ca120a07db7cb,False,False,True,False
51,513a7b806ca0bbcb46a5a1e3,CN NGB,5358fc78b68ca120a07db9f4,5358fc78b68ca120a07dbcfb,False,2022-01-02 16:23:28,"[{'baseline': '5358fc78b68ca120a07db9f4', 'act...","{'5358fc78b68ca120a07dbcfb': 1, 'baseline': 8}",5358fc78b68ca120a07db9f4,5358fc78b68ca120a07db9f4,False,False,True,False
64,513a7b806ca0bbcb46a5a1e3,TH LCH,54880c9b2f16092e9efe8a43,5358fc77b68ca120a07db205,False,2022-02-01 00:00:00,"[{'baseline': '54880c9b2f16092e9efe8a43', 'act...",{'baseline': 14},54880c9b2f16092e9efe8a43,54880c9b2f16092e9efe8a43,False,False,True,False
83,513a7b806ca0bbcb46a5a1e3,CN SHA,5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07db9f4,False,2022-03-16 04:59:07,"[{'baseline': '5358fc78b68ca120a07dbcfb', 'act...",{'baseline': 15},5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,False,False,True,False
99,513a7b806ca0bbcb46a5a1e3,VN SGN,5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07db7cb,False,2022-05-01 00:00:00,"[{'baseline': '5358fc78b68ca120a07dbaa9', 'act...","{'baseline': 21, '5358fc78b68ca120a07dbc52': 1}",5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07dbaa9,False,False,True,False
119,513a7b806ca0bbcb46a5a1e3,JP TYO CW,5358fc78b68ca120a07dbcff,5358fc78b68ca120a07dbd00,False,2022-06-17 10:35:19,"[{'baseline': '5358fc78b68ca120a07dbcff', 'act...",{'baseline': 3},5358fc78b68ca120a07dbcff,5358fc78b68ca120a07dbcff,False,False,True,False
212,5153880066556bcfc5c43d11,FUZHOU,57c539e9447c1ee77504c390,5358fc78b68ca120a07dbc52,False,2021-11-10 18:29:20,"[{'baseline': '57c539e9447c1ee77504c390', 'act...",{'baseline': 33},57c539e9447c1ee77504c390,57c539e9447c1ee77504c390,False,False,True,False
226,5153880066556bcfc5c43d11,FUZHOU,57c539e9447c1ee77504c390,5358fc78b68ca120a07dbc52,False,2021-11-28 07:28:19,"[{'baseline': '57c539e9447c1ee77504c390', 'act...","{'baseline': 32, '5358fc78b68ca120a07dbc52': 1}",57c539e9447c1ee77504c390,57c539e9447c1ee77504c390,False,False,True,False
228,5153880066556bcfc5c43d11,XIA MEN,5358fc78b68ca120a07dbc52,5358fc78b68ca120a07dbc99,False,2021-12-01 00:00:00,"[{'baseline': '5358fc78b68ca120a07dbc52', 'act...",{'baseline': 25},5358fc78b68ca120a07dbc52,5358fc78b68ca120a07dbc52,False,False,True,False
254,5153880066556bcfc5c43d11,XIAMEN,5358fc78b68ca120a07dbc52,5358fc78b68ca120a07dbc09,False,2022-01-01 00:00:00,"[{'baseline': '5358fc78b68ca120a07dbc52', 'act...","{'baseline': 69, '5358fc78b68ca120a07dbc09': 1}",5358fc78b68ca120a07dbc52,5358fc78b68ca120a07dbc52,False,False,True,False


In [None]:
clean_personilized_prediction_df.value_counts(["bl_suggested", "suggested_match", "bl_match"], normalize=True)

In [102]:
pd.crosstab(clean_personilized_prediction_df[["bl_match"]], clean_personilized_prediction_df[["suggested_match"]])

ValueError: Shape of passed values is (1, 2), indices imply (408069, 2)

In [118]:
# 4

In [117]:
personilized_prediction_df[(personilized_prediction_df["bl_match"] == False) & (
    personilized_prediction_df["suggested_match"] == False)
                          ]

Unnamed: 0,vessel,reported,bl_dest,real_dest,bl_match,ts,found_destinations,new,suggested_dest,suggested_no_bl_dest,suggested_match,suggested_no_base_match,bl_suggested
7,513a7b806ca0bbcb46a5a1e3,KH SHV,5358fc78b68ca120a07db7cb,54880c9b2f16092e9efe8a43,False,2021-09-23 05:49:21,"[{'baseline': '5358fc78b68ca120a07db7cb', 'act...","{'54880c9b2f16092e9efe8a3f': 1, 'baseline': 5}",5358fc78b68ca120a07db7cb,5358fc78b68ca120a07db7cb,False,False,True
51,513a7b806ca0bbcb46a5a1e3,CN NGB,5358fc78b68ca120a07db9f4,5358fc78b68ca120a07dbcfb,False,2022-01-02 16:23:28,"[{'baseline': '5358fc78b68ca120a07db9f4', 'act...","{'5358fc78b68ca120a07dbcfb': 1, 'baseline': 8}",5358fc78b68ca120a07db9f4,5358fc78b68ca120a07db9f4,False,False,True
64,513a7b806ca0bbcb46a5a1e3,TH LCH,54880c9b2f16092e9efe8a43,5358fc77b68ca120a07db205,False,2022-02-01 00:00:00,"[{'baseline': '54880c9b2f16092e9efe8a43', 'act...",{'baseline': 14},54880c9b2f16092e9efe8a43,54880c9b2f16092e9efe8a43,False,False,True
83,513a7b806ca0bbcb46a5a1e3,CN SHA,5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07db9f4,False,2022-03-16 04:59:07,"[{'baseline': '5358fc78b68ca120a07dbcfb', 'act...",{'baseline': 15},5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,False,False,True
99,513a7b806ca0bbcb46a5a1e3,VN SGN,5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07db7cb,False,2022-05-01 00:00:00,"[{'baseline': '5358fc78b68ca120a07dbaa9', 'act...","{'baseline': 21, '5358fc78b68ca120a07dbc52': 1}",5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07dbaa9,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
547714,62d6e20e6e5c10c029df7f37,ID SRI,5358fc78b68ca120a07db74f,5358fc78b68ca120a07dbc01,False,2022-04-16 05:20:30,"[{'baseline': '5358fc78b68ca120a07db74f', 'act...","{'baseline': 14, '5358fc78b68ca120a07dbc01': 1}",5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,False,False,True
547715,62d6e20e6e5c10c029df7f37,ID SRI,5358fc78b68ca120a07db74f,5358fc78b68ca120a07dbc01,False,2022-05-01 00:00:00,"[{'baseline': '5358fc78b68ca120a07db74f', 'act...","{'baseline': 14, '5358fc78b68ca120a07dbc01': 2}",5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,False,False,True
547716,62d6e20e6e5c10c029df7f37,ID SRI,5358fc78b68ca120a07db74f,5358fc78b68ca120a07dbc01,False,2022-05-03 00:00:00,"[{'baseline': '5358fc78b68ca120a07db74f', 'act...","{'baseline': 14, '5358fc78b68ca120a07dbc01': 3}",5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,False,False,True
547733,62d6e20e6e5c10c029df7f37,ID SRI,5358fc78b68ca120a07db74f,5358fc78b68ca120a07dbc01,False,2022-07-04 00:27:42,"[{'baseline': '5358fc78b68ca120a07db74f', 'act...","{'baseline': 21, '5358fc78b68ca120a07dbc01': 4}",5358fc78b68ca120a07db74f,5358fc78b68ca120a07db74f,False,False,True


In [86]:
# 5

In [90]:
bl_match_df = clean_personilized_prediction_df[clean_personilized_prediction_df["bl_match"] == True][[
    "vessel", "reported", "bl_dest", "real_dest", "bl_match", TS, "suggested_dest"]]

In [91]:
suggested_match_df = clean_personilized_prediction_df[clean_personilized_prediction_df["suggested_match"] == True][[
    "vessel", "reported", "bl_dest", "real_dest", "bl_match", TS, "suggested_dest"]]

In [93]:
bl_match_df.merge(suggested_match_df, on=[
                  "vessel", "reported", TS, "bl_dest", "real_dest", "suggested_dest"], how="outer")[:50]

Unnamed: 0,vessel,reported,bl_dest,real_dest,bl_match_x,ts,suggested_dest,bl_match_y
0,513a7b806ca0bbcb46a5a1e3,JP SBK,5358fc78b68ca120a07dbbc9,5358fc78b68ca120a07dbbc9,True,2021-09-05 19:54:40,5358fc78b68ca120a07dbbc9,True
1,513a7b806ca0bbcb46a5a1e3,JP OSA H,5358fc78b68ca120a07dbcbb,5358fc78b68ca120a07dbcbb,True,2021-09-08 07:23:00,5358fc78b68ca120a07dbcbb,True
2,513a7b806ca0bbcb46a5a1e3,JP UKB,5358fc78b68ca120a07dbc9e,5358fc78b68ca120a07dbc9e,True,2021-09-08 19:54:40,5358fc78b68ca120a07dbc9e,True
3,513a7b806ca0bbcb46a5a1e3,KR PUS,5358fc78b68ca120a07dbc71,5358fc78b68ca120a07dbc71,True,2021-09-09 13:45:35,5358fc78b68ca120a07dbc71,True
4,513a7b806ca0bbcb46a5a1e3,CN SHA,5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,True,2021-09-11 05:47:44,5358fc78b68ca120a07dbcfb,True
5,513a7b806ca0bbcb46a5a1e3,CN HKG,5358fc78b68ca120a07dbce5,5358fc78b68ca120a07dbce5,True,2021-09-16 15:19:28,5358fc78b68ca120a07dbce5,True
6,513a7b806ca0bbcb46a5a1e3,VN SGN,5358fc78b68ca120a07dbaa9,5358fc78b68ca120a07dbaa9,True,2021-09-19 13:38:37,5358fc78b68ca120a07dbaa9,True
7,513a7b806ca0bbcb46a5a1e3,TH LCH,54880c9b2f16092e9efe8a43,54880c9b2f16092e9efe8a43,True,2021-09-25 07:13:04,54880c9b2f16092e9efe8a43,True
8,513a7b806ca0bbcb46a5a1e3,VN HPH,5358fc77b68ca120a07db205,5358fc77b68ca120a07db205,True,2021-09-27 05:19:22,5358fc77b68ca120a07db205,True
9,513a7b806ca0bbcb46a5a1e3,VN HPH,5358fc77b68ca120a07db205,5358fc77b68ca120a07db205,True,2021-10-01 00:00:00,5358fc77b68ca120a07db205,True


In [95]:
clean_personilized_prediction_df[(clean_personilized_prediction_df["bl_match"] == True) & (
    clean_personilized_prediction_df["suggested_match"] == False)]

Unnamed: 0,vessel,reported,bl_dest,real_dest,bl_match,ts,found_destinations,new,suggested_dest,suggested_no_bl_dest,suggested_match_2,suggested_no_base_match,bl_suggested,suggested_match
871,54878d00789f4e8acb25a84d,YANGSHAN,5358fc78b68ca120a07dbcfb,5358fc78b68ca120a07dbcfb,True,2021-10-27 01:33:47,"[{'baseline': '5358fc78b68ca120a07dbcfb', 'act...",{'5de771cb9cfeec59a0141541': 1},5de771cb9cfeec59a0141541,5de771cb9cfeec59a0141541,False,False,False,False
898,54878d00789f4e8acb25a84d,KING ABDULLAH,57459203000cad0168e18723,57459203000cad0168e18723,True,2022-02-01 00:00:00,"[{'baseline': '57459203000cad0168e18723', 'act...","{'54880a1a2f16092e9efe61e5': 4, '591852ef8d548...",54880a1a2f16092e9efe61e5,54880a1a2f16092e9efe61e5,False,False,False,False
1005,548e2480b4814836268bd428,GBFXT,5358fc78b68ca120a07dbc82,5358fc78b68ca120a07dbc82,True,2022-03-10 04:45:14,"[{'baseline': '5358fc78b68ca120a07dbc82', 'act...",{'5358fc78b68ca120a07dbcf7': 2},5358fc78b68ca120a07dbcf7,5358fc78b68ca120a07dbcf7,False,False,False,False
1122,54fe340096f422917a4fe00d,SINES,5358fc78b68ca120a07db7d1,5358fc78b68ca120a07db7d1,True,2021-10-24 08:44:25,"[{'baseline': '5358fc78b68ca120a07db7d1', 'act...","{'54880a1a2f16092e9efe61e5': 1, '5358fc78b68ca...",54880a1a2f16092e9efe61e5,54880a1a2f16092e9efe61e5,False,False,False,False
1254,551c8680061d1b443e31deeb,CO BUN,55b5d0da2f9be5a605f67b45,55b5d0da2f9be5a605f67b45,True,2022-02-01 00:00:00,"[{'baseline': '55b5d0da2f9be5a605f67b45', 'act...",{'54880a1a2f16092e9efe61e4': 3},54880a1a2f16092e9efe61e4,54880a1a2f16092e9efe61e4,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545780,604a1a5fba6708076e1017fa,SGSIN(PWBGA),591852ef8d54876cb3f80f2d,591852ef8d54876cb3f80f2d,True,2022-07-01 00:00:00,"[{'baseline': '591852ef8d54876cb3f80f2d', 'act...","{'54880a1a2f16092e9efe61e5': 2, '5358fc78b68ca...",54880a1a2f16092e9efe61e5,54880a1a2f16092e9efe61e5,False,False,False,False
546810,609153ee2e6583434c994910,EGPSD > MYPKG,5fe891a959431f434b3bbc55,5fe891a959431f434b3bbc55,True,2022-07-02 19:37:21,"[{'baseline': '5fe891a959431f434b3bbc55', 'act...",{'54880a1a2f16092e9efe61e5': 1},54880a1a2f16092e9efe61e5,54880a1a2f16092e9efe61e5,False,False,False,False
546822,609153ee2e6583434c994910,EGSUZ > FRDKK,5358fc78b68ca120a07dba53,5358fc78b68ca120a07dba53,True,2022-08-13 10:29:56,"[{'baseline': '5358fc78b68ca120a07dba53', 'act...","{'54880a1a2f16092e9efe61e5': 2, '5358fc78b68ca...",54880a1a2f16092e9efe61e5,54880a1a2f16092e9efe61e5,False,False,False,False
547007,60a3b5c4eae7152674710ef5,JP UKB RS,5358fc78b68ca120a07dbc9e,5358fc78b68ca120a07dbc9e,True,2022-07-03 19:31:15,"[{'baseline': '5358fc78b68ca120a07dbc9e', 'act...",{'53720b4f57b2d3980edf9bfa': 1},53720b4f57b2d3980edf9bfa,53720b4f57b2d3980edf9bfa,False,False,False,False


In [97]:
(3903 / len(clean_personilized_prediction_df

0.9564558934886012