In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
dataset=pd.read_csv("./Data/road_traffic/mined_rtfm_relabelled_confidences.csv", index_col=0)
dataset = dataset.set_index('case:concept:name')
X=dataset.drop(columns=["Class"])

y=dataset['Class']
print("No. of features:"+str(len(X.columns)))

le = LabelEncoder()
print("Is na? "+str(X.isnull().values.any()))
y_transformed = le.fit_transform(y)
le_name_mapping = pd.Series(dict(zip(le.classes_,le.transform(le.classes_))))
cols=X.columns.to_list()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y_transformed,
                                                    test_size=0.2,
                                                    stratify=y_transformed,
                                                    shuffle=True,
                                                    random_state=0)

No. of features:2189
Is na? True


In [4]:
def calculateAverageDistance(prot_values,class_data_without_prot):
    distances=cdist([prot_values], class_data_without_prot, 'euclidean')
    cohesion=np.mean(distances)
    return cohesion

In [5]:
route="07-21-2025_13-15-50"
best_class0_our_prot=pd.read_csv("./results/Ours/"+route+"/best_predicted_instance_class0.csv")["case:concept:name"][0]
best_class1_our_prot=pd.read_csv("./results/Ours/"+route+"/best_predicted_instance_class1.csv")["case:concept:name"][0]
best_class2_our_prot=pd.read_csv("./results/Ours/"+route+"/best_predicted_instance_class2.csv")["case:concept:name"][0]
best_class3_our_prot=pd.read_csv("./results/Ours/"+route+"/best_predicted_instance_class3.csv")["case:concept:name"][0]

In [6]:
best_class0_kmedoids=pd.read_csv("./results/Kmedoids/medoid_class0.csv")["case:concept:name"][0]
best_class1_kmedoids=pd.read_csv("./results/Kmedoids/medoid_class1.csv")["case:concept:name"][0]
best_class2_kmedoids=pd.read_csv("./results/Kmedoids/medoid_class2.csv")["case:concept:name"][0]
best_class3_kmedoids=pd.read_csv("./results/Kmedoids/medoid_class3.csv")["case:concept:name"][0]

In [7]:
prototypes=[(0,best_class0_kmedoids, best_class0_our_prot), 
            (1, best_class1_kmedoids, best_class1_our_prot), 
            (2, best_class2_kmedoids, best_class2_our_prot),
            (3, best_class3_kmedoids, best_class3_our_prot)]

In [8]:
training_data_used=X_train.fillna(-100)

In [9]:
for pairPrototypes in prototypes:
    classProts=pairPrototypes[0]
    identifier_medoid=pairPrototypes[1]
    identifier_our_prot=pairPrototypes[2]

    data_class_X=training_data_used[y_train==classProts]
    data_other_classes=training_data_used[y_train!=classProts]

    prot_values_kmedoids=training_data_used.loc[identifier_medoid].values
    class_data_without_medoid_prot=data_class_X.drop(identifier_medoid).values
    cohesionMedoid=calculateAverageDistance(prot_values_kmedoids,class_data_without_medoid_prot)
    sepMedoid=calculateAverageDistance(prot_values_kmedoids, data_other_classes)

    prot_values_our_prot=training_data_used.loc[identifier_our_prot].values
    class_data_without_our_prot=data_class_X.drop(identifier_our_prot).values
    cohesionOurs=calculateAverageDistance(prot_values_our_prot, class_data_without_our_prot)
    sepOurs=calculateAverageDistance(prot_values_our_prot, data_other_classes)
    
    print("Class "+ str(classProts)+":")
    print("Cohesion: Kmedoid prot:"+str(cohesionMedoid)+", Ours:"+str(cohesionOurs))
    print("Separation: Kmedoid prot:"+str(sepMedoid)+", Ours:"+str(sepOurs))

Class 0:
Cohesion: Kmedoid prot:108.76124630049367, Ours:3213.5269463770996
Separation: Kmedoid prot:4196.133347794332, Ours:5238.083972692399
Class 1:
Cohesion: Kmedoid prot:411.469896725199, Ours:3964.563191885667
Separation: Kmedoid prot:4362.376261878498, Ours:5857.095312142479
Class 2:
Cohesion: Kmedoid prot:797.3379379962704, Ours:931.6956005662787
Separation: Kmedoid prot:4304.17294981836, Ours:4301.978206367681
Class 3:
Cohesion: Kmedoid prot:1137.014173313287, Ours:1137.014173313287
Separation: Kmedoid prot:3449.1854232454675, Ours:3449.1854232454675
