# Phishing Network Code

In [64]:
import networkx as nx
import re, os, pickle, gzip, warnings, nltk
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

from node2vec import Node2Vec
from urllib.parse import urlparse
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from datetime import datetime
from tqdm import tqdm
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
from nltk.corpus import stopwords
from dotenv import load_dotenv

warnings.filterwarnings(action='ignore', category=UserWarning, module='gensim')
load_dotenv()
nltk.download('stopwords') # download stop words
stop_words = set(stopwords.words('english'))
PATH = os.environ["_path"]


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\gichu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Undersampling to handle Class imbalances

In [76]:
df = pd.read_csv(PATH + "data.csv", names=['url', 'label'], skiprows=1)
print(df['label'].value_counts()) # intial label distribtution

df_bad = df[df['label'] == 'bad'][:5000]
df_good = df[df['label'] == 'good']

df_bad_undersampled= df_bad.sample(n=(len(df_bad)), random_state=42) # underample maj class
df_good_undersampled= df_good.sample(n=len(df_bad_undersampled), random_state=42) # underample maj class

df_balanced = pd.concat([df_bad_undersampled, df_good_undersampled], axis=0) # combine the df
df = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
df['label'].value_counts()


label
good    344821
bad      75643
Name: count, dtype: int64


label
good    5000
bad     5000
Name: count, dtype: int64

### constructing graph using networkx lib where nodes created: URL, Domain, Words
- also creating network embedding later to be used to measure distances

In [77]:
"""
This function segments URL into words
    - removes stop words based on length
    - removes special characters from query string
    - splist domain into components

Returns:
    A set of words used in the URL

"""
def segment_url(url):

    words = set()
    parsed = urlparse(url)
    
    # Segment the hostname
    if parsed.hostname:
        words.update(parsed.hostname.split('.'))

    # Segment the path using punctuation symbols
    if parsed.path:
        words.update(re.split(r'[\/.!&,#$%;&]', parsed.path))

    # Segment the query string
    if parsed.query:
        queries = parsed.query.split('&')
        for query in queries:
            key_value = query.split('=')
            if len(key_value) == 2:
                words.add(key_value[0])
                words.add(key_value[1])

    # Remove empty strings and stop words
    words = {word for word in words if word and len(word) > 1 and word not in stop_words}  
    return words

"""
This function to adds nodes and edges as defined in research paper:
edges between
    - URL -> DOMAIN
    - URL -> WORDS
    - DOMAIN -> authoritative name servers.

Returns:
    N/A
"""
def add_to_graph(g, url_entry):
    url = url_entry["url"] # fetch url
    if not url.startswith(("http://", "https://")): url = "http://" + url
    parsed = urlparse(url) # parse the url
    #print(parsed)

    # Create URL node
    url_node = f"{url}" # URL
    g.add_node(url_node, type="URL")
    #print(url)

    # adding edge from URL to DOMAIN
    if parsed.hostname:
        domain_node = f"{parsed.hostname}" # domain

        # Check if the domain node already exists in the graph, and add if not exist else reuse the domain
        if not g.has_node(domain_node):
            g.add_node(domain_node, type="Domain")  

        g.add_edge(url_node, domain_node)

    # Draw an edge between a URL (i.e., sentence) and a substring (i.e., word)
    # Segment URL into words and create nodes for each word
    words = segment_url(url)
    for word in words:
        word_node = f"{word}"
        g.add_node(word_node, type="Word") # word
        g.add_edge(url_node, word_node)
        
# visualise the graph plot
def visualise_graph(graph):
    pos = nx.spring_layout(graph)  # Define layout
    plt.figure(figsize=(16, 16))

    # Draw nodes with different colors based on their type
    url_nodes = [n for n, attr in graph.nodes(data=True) if attr.get('type') == 'URL']
    domain_nodes = [n for n, attr in graph.nodes(data=True) if attr.get('type') == 'Domain']
    word_nodes = [n for n, attr in graph.nodes(data=True) if attr.get('type') == 'Word']

    nx.draw_networkx_nodes(graph, pos, nodelist=url_nodes, node_size=400, node_color="blue", label="URLs")
    nx.draw_networkx_nodes(graph, pos, nodelist=domain_nodes, node_size=500, node_color="green", label="Domains")
    nx.draw_networkx_nodes(graph, pos, nodelist=word_nodes, node_size=200, node_color="red", label="Words")

    # Draw edges and labels
    nx.draw_networkx_edges(graph, pos, edge_color="gray", alpha=0.5)
    nx.draw_networkx_labels(graph, pos, font_size=10, font_color="black")

    plt.title("Graph Visualization")
    plt.legend(scatterpoints=1, loc="upper right", fontsize=10)
    plt.show()

def save_nodes_and_embeddings(g, dim=64, walk=10, n_walk=100, workers=4):

    # Create and save node2vec embeddings
    node2vec = Node2Vec(g, dimensions=dim, walk_length=walk, num_walks=n_walk, workers=workers)
    model = node2vec.fit()

    # save data to data/ dir
    with gzip.open(PATH + 'graph.gzpickle', 'wb') as f:
        pickle.dump(g, f)

    with gzip.open(PATH + "graph_embeddings.emb.gzpickle", 'wb') as f:
        pickle.dump(model.wv, f)  # Saving the word embeddings

    print("Graph complete.")

#  init Create graph ..
g = nx.Graph()
for idx, row in df.iterrows():
    add_to_graph(g, row)

save_nodes_and_embeddings(g)



Computing transition probabilities: 100%|██████████| 35275/35275 [01:36<00:00, 365.03it/s]


Graph complete.


# Belief Propogation Algorithm

In [78]:


class prediction:
    def __init__(self, type_emb, type_sim, type_compat, compat_threshold1, compat_threshold2, N_FOLDS, max_epochs):
            # type_compat = 'table1', 'table2', 'table3'
            # type_emb = None, 'deepwalk', 'node2vec', 'doc2vec', 'word2vec'
            # compat_threshold1, 2 = None, 0.3, 0.5, 0.7
            # type_sim = None, 'rbf', 'minmax', 'cos'
            # ct_type = 'ct1_2', 'ct1_3', 'ct1_4', 'ct2', 'ct3_2', 'ct3_3', 'ct3_4'
            self.type_emb = type_emb
            self.type_sim = type_sim
            self.type_compat = type_compat
            self.compat_threshold1 = compat_threshold1
            self.compat_threshold2 = compat_threshold2
            self.N_FOLDS = N_FOLDS
            self.max_epochs = max_epochs
            self.g = None


    def test_evasion(self, g, evasion_set):

        #print("start")
        for test_url in evasion_set:
            add_to_graph(g, test_url)

        save_nodes_and_embeddings(g)
        print("Test nodes added to Graph")
        

    """
    we are using dataset found in  https://raw.githubusercontent.com/faizann24/Using-machine-learning-to-detect-malicious-URLs/refs/heads/master/data/data.csv
    this does not use community truth but are labelled good/bad
        - we therefore remove community truths as wont be used
    """
    def main(self):
        
        print("Reading data ...")
        # read ground truth data and graph g
        g = pickle.load(gzip.open(PATH + 'graph.gzpickle', 'rb')) 
        self.g = g
        #url_truth = pd.read_csv(PATH + "subset_of_data.csv", names=['url', 'label'], skiprows=1)
        #data = url_truth['url'].to_list()

        url_truth = df
        data = url_truth['url'].to_list()
        has_ground_truth  = set(data)
        print("Done ...")


        kf = KFold(n_splits=self.N_FOLDS, shuffle=True)
        precision_sum = float(0)
        recall_sum = float(0)
        f1score_sum = float(0)
        accuracy_sum = float(0)

        print("Starting {}-fold cross-validation".format(self.N_FOLDS))
        for train, test in kf.split(data): # iterating through each fold

            # spliting into traniing and testing set for each fold
            training_set = set(np.array(data)[train])
            test_set = set(np.array(data)[test])

            # split training&test set to each class(rel/irrel)
            relevant_training = set()
            irrelevant_training = set()
            relevant_test = set()
            irrelevant_test = set()


            # node == url in the training set
            """
            
            """
            for node in training_set:

                # checking df for circumstances where urls match, returning label field, with first occurence (we assume one occurence of each url anyways)
                if url_truth.loc[url_truth['url'] == node, 'label'].values[0] == "bad":
                    relevant_training.add(node)
                elif  url_truth.loc[url_truth['url'] == node, 'label'].values[0] == "good":
                    irrelevant_training.add(node)
                else:
                    print("error: ground truth error")

            for node in test_set:
                if url_truth.loc[url_truth['url'] == node, 'label'].values[0] == "bad":
                    relevant_test.add(node)
                elif  url_truth.loc[url_truth['url'] == node, 'label'].values[0] == "good":
                    irrelevant_test.add(node)
                else:
                    print("error: ground truth error")

            
            # initilize node label
            for node in g.nodes():
                g.nodes[node]['label'] = None
                g.nodes[node]['best_label'] = -1
                g.nodes[node]['data_cost'] = [0.5, 0.5] # default beliefs about being phishy or beneign following polonium heurisitic

                # msg box is a dict
                g.nodes[node]['msgbox'] = {} # this will maintain a list of each message being passed to each neighbour
                g.nodes[node]['msg_comp'] = [0, 0] # initialising iysmessages to be passed onto the neighbours
                for nbr in list(g.neighbors(node)):
                    g.nodes[node]['msgbox'][nbr] = [0, 0] 
    
            """
            for each node belonging in the training set we want to give it a label where 
            according to the paper, hidden variables are not known entities thus do not have
            labels

            note that instead of 0,1, our dataset used good and bad labels. so bad = 1, good = 0
            """
            
            mal = 0 # malicious counter
            bn = 0 # beniegn counter
            #print(training_set)

            for node in has_ground_truth:

                # labelling training set and givinginit potentials
                if node in training_set:

                    
                    domain_node = node
                    # graph formatted to include https ../
                    if not domain_node.startswith(("http://", "https://")): 
                        domain_node = "http://" + node

                    #print(node, domain_node)

                    g.nodes[domain_node]['label'] = 1 if url_truth.loc[url_truth['url'] == node, 'label'].values[0] == "bad" else 0
                    if g.nodes[domain_node]['label'] == 1:      # malicious
                        g.nodes[domain_node]['data_cost'] = [0.99, 0.01] # we know is malicious so start at 0.99, 0.1
                        mal+=1
                    elif g.nodes[domain_node]['label'] == 0:    # benign
                        g.nodes[domain_node]['data_cost'] = [0.01, 0.99]
                        bn+=1

            print(mal,bn) # show split of malicious and benign nodes


            """ 
            SETTING distances for all edges
            so it looks like embeddings are loaded in seperately from the graph rather than embedding graph nodes themselves?, looks optional however
            """
            if self.type_emb == 'None':
                self.type_emb = None

            if self.type_emb != None:

                with gzip.open(PATH + "graph_embeddings.emb.gzpickle", 'rb') as f:
                    emb = pickle.load(f)

                min_dist = float("inf")
                max_dist = -float("inf")
                
                """
                calculating distance of nodes which will later be used to calc similarity in edge potentails, weher that is equclidian distance etc ..
                """
                for edge in g.edges():

                    if self.type_sim == 'minmax':
                        # euclidean distance
                        g.edges[edge]['distance'] = np.linalg.norm(emb[edge[0]] - emb[edge[1]])
                        if g.edges[edge]['distance'] > max_dist:
                            max_dist = g.edges[edge]['distance']
                        if g.edges[edge]['distance'] < min_dist:
                            min_dist = g.edges[edge]['distance']
                    elif self.type_sim == 'cos':
                        # cosine similarity
                        g.edges[edge]['sim'] = (np.dot(emb[edge[0]], emb[edge[1]]) / (np.linalg.norm(emb[edge[0]]) * np.linalg.norm(emb[edge[1]])))
                        g.edges[edge]['distance'] = 1 - g.edges[edge]['sim']
                    elif self.type_sim == 'rbf':
                        # euclidean distance
                        g.edges[edge]['distance'] = np.linalg.norm(emb[edge[0]] - emb[edge[1]])
                        # rbf sim (see: https://en.wikipedia.org/wiki/Radial_basis_function_kernel )
                        g.edges[edge]['sim'] = np.exp((-1.0 / 2.0) * np.power(g.edges[edge]['distance'], 2.0))

                if self.type_sim == 'minmax':
                    for edge in g.edges():
                        g.edges[edge]['sim'] = 1 - np.divide((g.edges[edge]['distance'] - min_dist), max_dist-min_dist)
                
                print("embedding done")
            # if type_emb not provided...
            else:
                # set initial messages
                for edge in g.edges():
                    #g.edges[edge]['msg'] = [0, 0]
                    g.edges[edge]['distance'] = 1.0
                    g.edges[edge]['sim'] = 0.5

            print("Done.")

            """
            This is the actual belief propogation section of the algorithm, which loops for max_epochs
            number of iterations performing the following:
                - 
            """
            for epoch in range(self.max_epochs):
                precision = float(0)
                recall = float(0)
                f1score = float(0)
                accuracy = float(0)

                #visualise_graph(g)
                self.step(g)
                print("Iteration: {} MAP: {}".format(epoch + 1, self.MAP(g)))
                #visualisie_graph(g)


                #print(relevant_test, "\n")
                #print(g.nodes)

                relevant_correctness = 0
                relevant_incorrectness = 0
                for i in relevant_test:
                    if not i.startswith(("http://", "https://")): 
                        i = "http://" + i
                        
                    if g.nodes[i]['best_label'] == 1:
                        relevant_correctness += 1
                    else:
                        relevant_incorrectness += 1

                irrelevant_correctness = 0
                irrelevant_incorrectness = 0
                for i in irrelevant_test:
                    if not i.startswith(("http://", "https://")): 
                        i = "http://" + i
                        

                    if g.nodes[i]['best_label'] == 0:
                        irrelevant_correctness += 1
                    else:
                        irrelevant_incorrectness += 1

                print("rel_cor: " + str(relevant_correctness))
                print("rel_incor: " + str(relevant_incorrectness))
                print("irrel_cor: " + str(irrelevant_correctness))
                print("irrel_incor: " + str(irrelevant_incorrectness))

                print("Relevant Accuracy: {:.6}".format(relevant_correctness / (relevant_correctness + relevant_incorrectness)))
                print("Irrelevant Accuracy: {:.6}".format(irrelevant_correctness / (irrelevant_correctness + irrelevant_incorrectness)))

                if (relevant_correctness + irrelevant_incorrectness) == 0:
                    precision = float(0)
                else:
                    precision = relevant_correctness / (relevant_correctness + irrelevant_incorrectness)
                print("Precision: {:.6}".format(precision))

                if (relevant_correctness + relevant_incorrectness) == 0:
                    recall = float(0)
                else:
                    recall = relevant_correctness / (relevant_correctness + relevant_incorrectness)
                print("Recall: {:.6}".format(recall))

                if (precision + recall) == 0:
                    f1score = float(0)
                else:
                    f1score = 2 * precision * recall / (precision + recall)
                print("F1 score: {:.6}".format(f1score))

                accuracy = (relevant_correctness + irrelevant_correctness) / (relevant_correctness + relevant_incorrectness + irrelevant_correctness + irrelevant_incorrectness)
                print("Accuracy: {:.6}".format(accuracy))

            precision_sum += precision
            recall_sum += recall
            f1score_sum += f1score
            accuracy_sum += accuracy
            print()
        
        print("Done.")
        print()

        avg_precision = precision_sum / self.N_FOLDS
        avg_recall = recall_sum / self.N_FOLDS
        avg_f1 = f1score_sum / self.N_FOLDS
        avg_acc = accuracy_sum / self.N_FOLDS

        print("Averaged precision: {:.6}".format(avg_precision))
        print("Averaged recall: {:.6}".format(avg_recall))
        print("Averaged F1 score: {:.6}".format(avg_f1))
        print("Averaged accuracy: {:.6}".format(avg_acc))

        print("End: " + str(datetime.now()))



        ## conduct test on unseen evasions
        #self.test_evasion(g)

        return avg_precision, avg_recall, avg_f1, avg_acc

    """
    This function propogates message
    --------------------------------

    each step send a message from a node to its neighbours
        - dont sent a message to a labelled node as obesrved variables do not recieve messages
        - if sending from a labelled node use _send_msg_label, else _send_msg where the largest diff comes from how the message is calculated
    """
    def step(self, G):
        for n in tqdm(G.nodes(), desc="Propagate from vertices with label", mininterval=0.5): # tqdm inits a progress bar
            if G.nodes[n]['label'] != None:
                for nbr in G.neighbors(n):
                    # do not propagate to nodes with label
                    if G.nodes[nbr]['label'] == None:

                        #print("HIDDEN", nbr)
                        self._send_msg_label(G, n, nbr)
        #for n in tqdm(G.nodes(), desc="Compiling message boxes 1", mininterval=0.5):
        #    G.nodes[n]['msg_comp'] = [0, 0]
        #    for nbr in G.neighbors(n):
        #        G.nodes[n]['msg_comp'][0] += G.nodes[n]['msgbox'][nbr][0]
        #        G.nodes[n]['msg_comp'][1] += G.nodes[n]['msgbox'][nbr][1]
        for n in tqdm(G.nodes(), desc="Propagate from vertices without label", mininterval=0.5):
            if G.nodes[n]['label'] == None:
                for nbr in G.neighbors(n):
                    # do not propagate to nodes with label
                    if G.nodes[nbr]['label'] == None:
                        self._send_msg(G, n, nbr)
        #for n in tqdm(G.nodes(), desc="Compiling message boxes 2", mininterval=0.5):
        #    G.nodes[n]['msg_comp'] = [0, 0]
        #    for nbr in G.neighbors(n):
        #        G.nodes[n]['msg_comp'][0] += G.nodes[n]['msgbox'][nbr][0]
        #        G.nodes[n]['msg_comp'][1] += G.nodes[n]['msgbox'][nbr][1]

    """
    calculates the message for hidden variables/nodes
    """
    def _min_sum(self, G, _from, _to):
        eps = 0.001

        new_msg = [0] * 2
        for i in range(2):  # we only have 2 labels so far
            fromnode = G.nodes[_from]

            # initialize
            # related => label 1
            # not related => label 0
            p_not_related = 0
            p_related = 0

            # data cost
            #p_not_related += math.log(1 - fromnode['data_cost'][0])
            #p_related += math.log(1 - fromnode['data_cost'][1])
            p_not_related += fromnode['data_cost'][0]
            p_related += fromnode['data_cost'][1]

            #for nbr in G.neighbors(_from):
            #    if nbr == _to:
            #        continue
            #    p_not_related += fromnode['msgbox'][nbr][0]
            #    p_related += fromnode['msgbox'][nbr][1]
            p_not_related += fromnode['msg_comp'][0] - fromnode['msgbox'][_to][0]
            p_related += fromnode['msg_comp'][1] - fromnode['msgbox'][_to][1]

            # smoothness cost
            if self.type_compat == 'table1':
                # original (we think this version is for sum-product...)
                #p_not_related += 0.5 + eps if i == 0 else 0.5 - eps
                #p_related += 0.5 - eps if i == 0 else 0.5 + eps

                """ using Polonium based Heurisitic. chose of heuristic is likely dependent on similarity measures used"""
                p_not_related += 0.5 - eps if i == 0 else 0.5 + eps 
                p_related += 0.5 + eps if i == 0 else 0.5 - eps
            elif self.type_compat == 'table2':
                # original (this version works only when table2 && cos)
                #p_not_related += 0 if i == 0 else 1 - G[_from][_to]['distance']
                #p_related += 1 - G[_from][_to]['distance'] if i == 0 else 0
                #p_not_related += 0 if i == 0 else G[_from][_to]['sim']
                #p_related += G[_from][_to]['sim'] if i == 0 else 0
                p_not_related += 0 if i == 0 else G[_from][_to]['distance']
                p_related += G[_from][_to]['distance'] if i == 0 else 0
            elif self.type_compat == 'table3':
                # original (our sim are similarities -> same = 1 / completely different = 0)

                """
                EDGE POTENTIALS USING COMPATIBILY MATRIX DEFINED IN THE PAPER
                -------------------------------------------------------------
                        Phishy                       Benign
                Phishy  min(ths+, 1 - sim(x, y))     max(ths−, sim(x, y))
                Benign  max(ths−, sim(x, y))         min(ths+, 1 - sim(x, y))
                """
                p_not_related += np.min([self.compat_threshold1, 1 - G[_to][_from]['sim']]) if i == 0 else np.max([self.compat_threshold2, G[_to][_from]['sim']])
                p_related += np.max([self.compat_threshold2, G[_to][_from]['sim']]) if i == 0 else np.min([self.compat_threshold1, 1 - G[_to][_from]['sim']])
                
            new_msg[i] = min(p_not_related, p_related)
            #print(new_msg)

        # Normalization
        # new_msg = np.exp(new_msg) / np.sum(np.exp(new_msg))

        return new_msg

    """
    This function propogates messages from labelled nodes
    if from node is maliciious then msg = [1, 0] else benign is [0, 1]
    """
    def _send_msg_label(self, G, _from, _to):
        #print(f"FROM   {G.nodes[_from]}" )
        # if lable is given
        if G.nodes[_from]['label'] == 1:
            msg = [1, 0]
        elif G.nodes[_from]['label'] == 0:
            msg = [0, 1]
        else:
            # ct2 case
            msg = G.nodes[_from]['data_cost']

        to_node = G.nodes[_to]
        #print(msg, to_node)
        #print("\n", to_node['msg_comp'][0], to_node['msgbox'][_from][0])


        # subtract original msg
        to_node['msg_comp'][0] -= to_node['msgbox'][_from][0]
        to_node['msg_comp'][1] -= to_node['msgbox'][_from][1]
        #print("\n", to_node['msg_comp'][0], to_node['msgbox'][_from][0])

        # add new msg
        to_node['msg_comp'][0] += msg[0]
        to_node['msg_comp'][1] += msg[1]

        # orignal msg := new msg
        to_node['msgbox'][_from] = msg
        #print(f"FROM   {G.nodes[_from]}    TO     {G.nodes[_to]['msg_comp'][0]} \n", )


    def _send_msg(self, G, _from, _to, ):
        # label not given
        msg = self._min_sum(G, _from, _to)

        to_node = G.nodes[_to]
        # subtract original msg from from node
        to_node['msg_comp'][0] -= to_node['msgbox'][_from][0]
        to_node['msg_comp'][1] -= to_node['msgbox'][_from][1]
        
        # add new msg
        to_node['msg_comp'][0] += msg[0]
        to_node['msg_comp'][1] += msg[1]
        # orignal msg := new msg
        to_node['msgbox'][_from] = msg



    """
    This function is used to evalatue perfomance of belief propgoation
    """
    def MAP(self, G):
        n_wrong_label = 0
        n_correct_label = 0

        # remember the objective is to minimise costs
        for n in G.nodes():
            #print(G.nodes[n])
            nodedata = G.nodes[n]

            cost_not_related = 0
            cost_related = 0

            # data costs
            cost_not_related += nodedata['data_cost'][0]
            cost_related += nodedata['data_cost'][1]

            # msg comp
            cost_not_related += nodedata['msg_comp'][0]
            cost_related += nodedata['msg_comp'][1]

            if cost_related < cost_not_related:
                nodedata['best_label'] = 1
            else:
                nodedata['best_label'] = 0

            
            #print(cost_related, cost_not_related, nodedata['best_label'], nodedata['label'])

            # as we are only checking labelled nodes, only concerned with url nodes
            if (nodedata['label'] == 1 and nodedata['best_label'] == 0) or (nodedata['label'] == 0 and nodedata['best_label'] == 1) :
                #print("error2: wrong label!")
                n_wrong_label += 1
        
            elif (nodedata['label'] == 1 and nodedata['best_label'] == 1) or (nodedata['label'] == 0 and nodedata['best_label'] == 0):
                n_correct_label += 1
            else:

                # if improper labelling do not consider
                pass

        print("# wrong label: " + str(n_wrong_label))
        print("# correct label: " + str(n_correct_label))
        


        """
        energy in this case looks like some measure of distance or metric, whereby the larger indicates 
            - higher datacosts assigned per label
            - more misclassifications as there are more discrepancies between predicted "best label" and curr label
        """
        energy = 0
        for n in G.nodes():
            cur_label = G.nodes[n]['best_label']

            #energy += math.log(1 - G.node[n]['data_cost'][cur_label])
            energy += G.nodes[n]['data_cost'][cur_label]
            for nbr, eattr in G[n].items():
                energy += 0 if G.nodes[nbr]['best_label'] == cur_label else eattr['distance']

        return energy


    """
    visualse graph with edge weights and stuff
    """
    def visualise_graph(g):
        pos = nx.spring_layout(g, seed=42)  # Define layout for better aesthetics

        # Plot the nodes with different colors based on their type (URL, Domain, Word)
        url_nodes = [n for n, attr in g.nodes(data=True) if attr.get('type') == 'URL']
        domain_nodes = [n for n, attr in g.nodes(data=True) if attr.get('type') == 'Domain']
        word_nodes = [n for n, attr in g.nodes(data=True) if attr.get('type') == 'Word']

        # Draw nodes
        nx.draw_networkx_nodes(g, pos, nodelist=url_nodes, node_size=600, node_color="skyblue", label="URLs")
        nx.draw_networkx_nodes(g, pos, nodelist=domain_nodes, node_size=300, node_color="lightgreen", label="Domains")
        nx.draw_networkx_nodes(g, pos, nodelist=word_nodes, node_size=150, node_color="salmon", label="Words")

        # Draw edges with weights
        edge_weights = [g[u][v].get('distance', 1) for u, v in g.edges()]
        nx.draw_networkx_edges(g, pos, edgelist=g.edges(), width=edge_weights, alpha=0.7, edge_color="gray")

        """

        # Draw labels (for nodes)
        node_labels = {}
        for node, attr in g.nodes(data=True):
            # Only display node labels if the node has a 'label' attribute
            if 'label' in attr:
                node_labels[node] = attr['label']
        
        nx.draw_networkx_labels(g, pos, labels=node_labels, font_size=10, font_color="black")
        """

        # Optional: Display edge weights as labels on the graph
        #edge_labels = {(u, v): round(g[u][v].get('distance', 0), 2) for u, v in g.edges()}
        #nx.draw_networkx_edge_labels(g, pos, edge_labels=edge_labels, font_size=8)
        plt.title("Graph Visualization with Node and Edge Labels")
        plt.legend(scatterpoints=1, loc="upper right", fontsize=10)
        plt.axis('off')  # Turn off the axis
        plt.show()


### Finding optimal param config using random grid search and saving best model

In [79]:

# parameter space
grid = {
    'type_sim': ['rbf', 'minmax', 'cos'],
    'type_compat': ['table1', 'table2', 'table3'],
    'compat_threshold1': [0.3, 0.5, 0.7],
    'compat_threshold2': [0.3, 0.5, 0.7],
    'N_FOLDS': [3 ,5, 7],
    'max_epochs': [3, 5]
}
# not including emb in param space as graph constructed outside of scope
type_emb = 'node2vec' 

# Random grid search
random_trails = 4  # Number of random configurations to try
best_score = float("-inf")
best_params = None
best_model = None
config_params = None

for _ in range(random_trails):

    params = {key: np.random.choice(values) for key, values in grid.items()} # select random param for key
    print(params)
    
    # train and eval using random param config
    p = prediction(type_emb, type_sim= params["type_sim"], type_compat= params["type_compat"], compat_threshold1= params["compat_threshold1"],
                    compat_threshold2= params["compat_threshold2"], N_FOLDS= params["N_FOLDS"], max_epochs= params["max_epochs"])
    
    avg_precision, avg_recall, avg_f1, avg_acc = p.main()
    score = (avg_precision +  avg_recall + avg_f1 +  avg_acc) / 4 # using average of all metrics to calc score
    
    if score > best_score:
        best_score = score
        best_params = (avg_precision, avg_recall, avg_f1, avg_acc)
        config_params = params
        best_model = p

print(f"Best Score: {best_score}")
print(f"Best config: {config_params}")
print(f"Best Average performance: {best_params}")

{'type_sim': 'cos', 'type_compat': 'table3', 'compat_threshold1': 0.7, 'compat_threshold2': 0.5, 'N_FOLDS': 5, 'max_epochs': 5}
Reading data ...
Done ...
Starting 5-fold cross-validation
3699 3998
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 573452.07it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 69407.95it/s]


# wrong label: 0
# correct label: 7697
Iteration: 1 MAP: 18558.82954427675
rel_cor: 876
rel_incor: 75
irrel_cor: 915
irrel_incor: 87
Relevant Accuracy: 0.921136
Irrelevant Accuracy: 0.913174
Precision: 0.909657
Recall: 0.921136
F1 score: 0.915361
Accuracy: 0.917051


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 509501.27it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70757.40it/s]


# wrong label: 0
# correct label: 7697
Iteration: 2 MAP: 18547.694704980422
rel_cor: 878
rel_incor: 73
irrel_cor: 916
irrel_incor: 86
Relevant Accuracy: 0.923239
Irrelevant Accuracy: 0.914172
Precision: 0.910788
Recall: 0.923239
F1 score: 0.916971
Accuracy: 0.918587


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 522265.19it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70942.81it/s]


# wrong label: 0
# correct label: 7697
Iteration: 3 MAP: 18545.0465546556
rel_cor: 878
rel_incor: 73
irrel_cor: 915
irrel_incor: 87
Relevant Accuracy: 0.923239
Irrelevant Accuracy: 0.913174
Precision: 0.909845
Recall: 0.923239
F1 score: 0.916493
Accuracy: 0.918075


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 533860.41it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 69949.42it/s]


# wrong label: 0
# correct label: 7697
Iteration: 4 MAP: 18545.0465546556
rel_cor: 878
rel_incor: 73
irrel_cor: 915
irrel_incor: 87
Relevant Accuracy: 0.923239
Irrelevant Accuracy: 0.913174
Precision: 0.909845
Recall: 0.923239
F1 score: 0.916493
Accuracy: 0.918075


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 489784.41it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 71422.16it/s]


# wrong label: 0
# correct label: 7697
Iteration: 5 MAP: 18563.873452991997
rel_cor: 870
rel_incor: 81
irrel_cor: 919
irrel_incor: 83
Relevant Accuracy: 0.914826
Irrelevant Accuracy: 0.917166
Precision: 0.912907
Recall: 0.914826
F1 score: 0.913866
Accuracy: 0.916027

3758 3927
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 528815.35it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72265.99it/s]


# wrong label: 0
# correct label: 7685
Iteration: 1 MAP: 18564.92185900158
rel_cor: 823
rel_incor: 67
irrel_cor: 997
irrel_incor: 76
Relevant Accuracy: 0.924719
Irrelevant Accuracy: 0.929171
Precision: 0.915462
Recall: 0.924719
F1 score: 0.920067
Accuracy: 0.927152


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 493221.35it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72227.01it/s]


# wrong label: 0
# correct label: 7685
Iteration: 2 MAP: 18552.599630033452
rel_cor: 826
rel_incor: 64
irrel_cor: 997
irrel_incor: 76
Relevant Accuracy: 0.92809
Irrelevant Accuracy: 0.929171
Precision: 0.915743
Recall: 0.92809
F1 score: 0.921875
Accuracy: 0.928681


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 470660.70it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 71200.67it/s]


# wrong label: 0
# correct label: 7685
Iteration: 3 MAP: 18552.007164871175
rel_cor: 826
rel_incor: 64
irrel_cor: 995
irrel_incor: 78
Relevant Accuracy: 0.92809
Irrelevant Accuracy: 0.927307
Precision: 0.913717
Recall: 0.92809
F1 score: 0.920847
Accuracy: 0.927662


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 437772.80it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 71719.73it/s]


# wrong label: 0
# correct label: 7685
Iteration: 4 MAP: 18552.007164871175
rel_cor: 826
rel_incor: 64
irrel_cor: 995
irrel_incor: 78
Relevant Accuracy: 0.92809
Irrelevant Accuracy: 0.927307
Precision: 0.913717
Recall: 0.92809
F1 score: 0.920847
Accuracy: 0.927662


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478967.68it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72157.65it/s]


# wrong label: 0
# correct label: 7685
Iteration: 5 MAP: 18572.074227725927
rel_cor: 819
rel_incor: 71
irrel_cor: 1005
irrel_incor: 68
Relevant Accuracy: 0.920225
Irrelevant Accuracy: 0.936626
Precision: 0.923337
Recall: 0.920225
F1 score: 0.921778
Accuracy: 0.92919

3677 4020
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 521953.82it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 71097.41it/s]


# wrong label: 0
# correct label: 7697
Iteration: 1 MAP: 18545.979487866898
rel_cor: 903
rel_incor: 70
irrel_cor: 916
irrel_incor: 64
Relevant Accuracy: 0.928058
Irrelevant Accuracy: 0.934694
Precision: 0.933816
Recall: 0.928058
F1 score: 0.930928
Accuracy: 0.931388


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 471161.31it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 71503.24it/s]


# wrong label: 0
# correct label: 7697
Iteration: 2 MAP: 18538.795356184986
rel_cor: 902
rel_incor: 71
irrel_cor: 917
irrel_incor: 63
Relevant Accuracy: 0.92703
Irrelevant Accuracy: 0.935714
Precision: 0.934715
Recall: 0.92703
F1 score: 0.930857
Accuracy: 0.931388


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 465346.33it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70792.47it/s]


# wrong label: 0
# correct label: 7697
Iteration: 3 MAP: 18536.6671456404
rel_cor: 902
rel_incor: 71
irrel_cor: 917
irrel_incor: 63
Relevant Accuracy: 0.92703
Irrelevant Accuracy: 0.935714
Precision: 0.934715
Recall: 0.92703
F1 score: 0.930857
Accuracy: 0.931388


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 477198.86it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70400.55it/s]


# wrong label: 0
# correct label: 7697
Iteration: 4 MAP: 18536.6671456404
rel_cor: 902
rel_incor: 71
irrel_cor: 917
irrel_incor: 63
Relevant Accuracy: 0.92703
Irrelevant Accuracy: 0.935714
Precision: 0.934715
Recall: 0.92703
F1 score: 0.930857
Accuracy: 0.931388


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 493336.47it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70612.86it/s]


# wrong label: 0
# correct label: 7697
Iteration: 5 MAP: 18563.49296388102
rel_cor: 891
rel_incor: 82
irrel_cor: 928
irrel_incor: 52
Relevant Accuracy: 0.915725
Irrelevant Accuracy: 0.946939
Precision: 0.944857
Recall: 0.915725
F1 score: 0.930063
Accuracy: 0.931388

3645 4032
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 513392.51it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70421.36it/s]


# wrong label: 0
# correct label: 7677
Iteration: 1 MAP: 18560.38508774712
rel_cor: 930
rel_incor: 72
irrel_cor: 911
irrel_incor: 57
Relevant Accuracy: 0.928144
Irrelevant Accuracy: 0.941116
Precision: 0.942249
Recall: 0.928144
F1 score: 0.935143
Accuracy: 0.934518


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478780.13it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70330.67it/s]


# wrong label: 0
# correct label: 7677
Iteration: 2 MAP: 18542.2074094005
rel_cor: 932
rel_incor: 70
irrel_cor: 907
irrel_incor: 61
Relevant Accuracy: 0.93014
Irrelevant Accuracy: 0.936983
Precision: 0.93857
Recall: 0.93014
F1 score: 0.934336
Accuracy: 0.933503


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 447124.89it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70318.67it/s]


# wrong label: 0
# correct label: 7677
Iteration: 3 MAP: 18543.265656609095
rel_cor: 932
rel_incor: 70
irrel_cor: 908
irrel_incor: 60
Relevant Accuracy: 0.93014
Irrelevant Accuracy: 0.938017
Precision: 0.939516
Recall: 0.93014
F1 score: 0.934804
Accuracy: 0.93401


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 485098.50it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 70048.84it/s]


# wrong label: 0
# correct label: 7677
Iteration: 4 MAP: 18543.23345925287
rel_cor: 932
rel_incor: 70
irrel_cor: 908
irrel_incor: 60
Relevant Accuracy: 0.93014
Irrelevant Accuracy: 0.938017
Precision: 0.939516
Recall: 0.93014
F1 score: 0.934804
Accuracy: 0.93401


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478394.66it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 69627.54it/s]


# wrong label: 0
# correct label: 7677
Iteration: 5 MAP: 18669.84508897726
rel_cor: 838
rel_incor: 164
irrel_cor: 929
irrel_incor: 39
Relevant Accuracy: 0.836327
Irrelevant Accuracy: 0.959711
Precision: 0.95553
Recall: 0.836327
F1 score: 0.891964
Accuracy: 0.896954

3689 4023
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 477559.28it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72190.93it/s]


# wrong label: 0
# correct label: 7712
Iteration: 1 MAP: 18531.062938744486
rel_cor: 892
rel_incor: 87
irrel_cor: 912
irrel_incor: 65
Relevant Accuracy: 0.911134
Irrelevant Accuracy: 0.93347
Precision: 0.932079
Recall: 0.911134
F1 score: 0.921488
Accuracy: 0.92229


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478818.87it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 73055.26it/s]


# wrong label: 0
# correct label: 7712
Iteration: 2 MAP: 18514.83943747945
rel_cor: 892
rel_incor: 87
irrel_cor: 913
irrel_incor: 64
Relevant Accuracy: 0.911134
Irrelevant Accuracy: 0.934493
Precision: 0.933054
Recall: 0.911134
F1 score: 0.921964
Accuracy: 0.922802


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 464523.77it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72174.20it/s]


# wrong label: 0
# correct label: 7712
Iteration: 3 MAP: 18514.76322066255
rel_cor: 892
rel_incor: 87
irrel_cor: 913
irrel_incor: 64
Relevant Accuracy: 0.911134
Irrelevant Accuracy: 0.934493
Precision: 0.933054
Recall: 0.911134
F1 score: 0.921964
Accuracy: 0.922802


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 466210.00it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72361.49it/s]


# wrong label: 0
# correct label: 7712
Iteration: 4 MAP: 18514.76322066255
rel_cor: 892
rel_incor: 87
irrel_cor: 913
irrel_incor: 64
Relevant Accuracy: 0.911134
Irrelevant Accuracy: 0.934493
Precision: 0.933054
Recall: 0.911134
F1 score: 0.921964
Accuracy: 0.922802


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 472426.55it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 72179.94it/s]


# wrong label: 0
# correct label: 7712
Iteration: 5 MAP: 18509.368684584584
rel_cor: 881
rel_incor: 98
irrel_cor: 919
irrel_incor: 58
Relevant Accuracy: 0.899898
Irrelevant Accuracy: 0.940635
Precision: 0.938232
Recall: 0.899898
F1 score: 0.918665
Accuracy: 0.920245

Done.

Averaged precision: 0.934973
Averaged recall: 0.8974
Averaged F1 score: 0.915267
Averaged accuracy: 0.918761
End: 2024-12-02 22:13:24.292372
{'type_sim': 'cos', 'type_compat': 'table3', 'compat_threshold1': 0.7, 'compat_threshold2': 0.7, 'N_FOLDS': 3, 'max_epochs': 5}
Reading data ...
Done ...
Starting 3-fold cross-validation
3111 3314
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 648418.00it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44934.68it/s]


# wrong label: 0
# correct label: 6425
Iteration: 1 MAP: 19080.61380743917
rel_cor: 1412
rel_incor: 153
irrel_cor: 1592
irrel_incor: 94
Relevant Accuracy: 0.902236
Irrelevant Accuracy: 0.944247
Precision: 0.937583
Recall: 0.902236
F1 score: 0.91957
Accuracy: 0.924023


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 491566.27it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 45077.40it/s]


# wrong label: 0
# correct label: 6425
Iteration: 2 MAP: 19069.135746716827
rel_cor: 1410
rel_incor: 155
irrel_cor: 1589
irrel_incor: 97
Relevant Accuracy: 0.900958
Irrelevant Accuracy: 0.942467
Precision: 0.935634
Recall: 0.900958
F1 score: 0.917969
Accuracy: 0.922485


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 581196.67it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 45200.78it/s]


# wrong label: 0
# correct label: 6425
Iteration: 3 MAP: 19069.594441711277
rel_cor: 1412
rel_incor: 153
irrel_cor: 1589
irrel_incor: 97
Relevant Accuracy: 0.902236
Irrelevant Accuracy: 0.942467
Precision: 0.935719
Recall: 0.902236
F1 score: 0.918673
Accuracy: 0.923101


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 562907.61it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44625.37it/s]


# wrong label: 0
# correct label: 6425
Iteration: 4 MAP: 19069.116946160168
rel_cor: 1412
rel_incor: 153
irrel_cor: 1589
irrel_incor: 97
Relevant Accuracy: 0.902236
Irrelevant Accuracy: 0.942467
Precision: 0.935719
Recall: 0.902236
F1 score: 0.918673
Accuracy: 0.923101


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 581495.91it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44760.88it/s]


# wrong label: 0
# correct label: 6425
Iteration: 5 MAP: 21495.387467858935
rel_cor: 372
rel_incor: 1193
irrel_cor: 1635
irrel_incor: 51
Relevant Accuracy: 0.2377
Irrelevant Accuracy: 0.969751
Precision: 0.879433
Recall: 0.2377
F1 score: 0.374245
Accuracy: 0.617349

3115 3315
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 614710.58it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44370.33it/s]


# wrong label: 0
# correct label: 6430
Iteration: 1 MAP: 19118.9476936928
rel_cor: 1435
rel_incor: 132
irrel_cor: 1573
irrel_incor: 112
Relevant Accuracy: 0.915763
Irrelevant Accuracy: 0.933531
Precision: 0.927602
Recall: 0.915763
F1 score: 0.921644
Accuracy: 0.924969


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 544555.42it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44213.25it/s]


# wrong label: 0
# correct label: 6430
Iteration: 2 MAP: 19107.890328632966
rel_cor: 1440
rel_incor: 127
irrel_cor: 1571
irrel_incor: 114
Relevant Accuracy: 0.918953
Irrelevant Accuracy: 0.932344
Precision: 0.926641
Recall: 0.918953
F1 score: 0.922781
Accuracy: 0.925892


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 551374.11it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44326.37it/s]


# wrong label: 0
# correct label: 6430
Iteration: 3 MAP: 19110.991382049688
rel_cor: 1441
rel_incor: 126
irrel_cor: 1568
irrel_incor: 117
Relevant Accuracy: 0.919592
Irrelevant Accuracy: 0.930564
Precision: 0.924904
Recall: 0.919592
F1 score: 0.92224
Accuracy: 0.925277


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 548404.59it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 44056.21it/s]


# wrong label: 0
# correct label: 6430
Iteration: 4 MAP: 19109.518855618608
rel_cor: 1441
rel_incor: 126
irrel_cor: 1569
irrel_incor: 116
Relevant Accuracy: 0.919592
Irrelevant Accuracy: 0.931157
Precision: 0.925498
Recall: 0.919592
F1 score: 0.922535
Accuracy: 0.925584


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 543090.24it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 43873.94it/s]


# wrong label: 0
# correct label: 6430
Iteration: 5 MAP: 21816.740188226577
rel_cor: 669
rel_incor: 898
irrel_cor: 1551
irrel_incor: 134
Relevant Accuracy: 0.42693
Irrelevant Accuracy: 0.920475
Precision: 0.833126
Recall: 0.42693
F1 score: 0.564557
Accuracy: 0.682657

3071 3371
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 605932.09it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 43355.63it/s]


# wrong label: 0
# correct label: 6442
Iteration: 1 MAP: 19116.276735586325
rel_cor: 1484
rel_incor: 135
irrel_cor: 1528
irrel_incor: 101
Relevant Accuracy: 0.916615
Irrelevant Accuracy: 0.937999
Precision: 0.936278
Recall: 0.916615
F1 score: 0.926342
Accuracy: 0.92734


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 560077.81it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 43787.77it/s]


# wrong label: 0
# correct label: 6442
Iteration: 2 MAP: 19099.85114906471
rel_cor: 1487
rel_incor: 132
irrel_cor: 1528
irrel_incor: 101
Relevant Accuracy: 0.918468
Irrelevant Accuracy: 0.937999
Precision: 0.936398
Recall: 0.918468
F1 score: 0.927346
Accuracy: 0.928264


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 517119.43it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 43366.01it/s]


# wrong label: 0
# correct label: 6442
Iteration: 3 MAP: 19096.790557724646
rel_cor: 1487
rel_incor: 132
irrel_cor: 1528
irrel_incor: 101
Relevant Accuracy: 0.918468
Irrelevant Accuracy: 0.937999
Precision: 0.936398
Recall: 0.918468
F1 score: 0.927346
Accuracy: 0.928264


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 535947.03it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 42983.07it/s]


# wrong label: 0
# correct label: 6442
Iteration: 4 MAP: 19093.412363213713
rel_cor: 1486
rel_incor: 133
irrel_cor: 1529
irrel_incor: 100
Relevant Accuracy: 0.917851
Irrelevant Accuracy: 0.938613
Precision: 0.936948
Recall: 0.917851
F1 score: 0.927301
Accuracy: 0.928264


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 501175.67it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 43629.44it/s]


# wrong label: 0
# correct label: 6442
Iteration: 5 MAP: 21454.96299014569
rel_cor: 366
rel_incor: 1253
irrel_cor: 1549
irrel_incor: 80
Relevant Accuracy: 0.226065
Irrelevant Accuracy: 0.95089
Precision: 0.820628
Recall: 0.226065
F1 score: 0.354479
Accuracy: 0.589594

Done.

Averaged precision: 0.844395
Averaged recall: 0.296899
Averaged F1 score: 0.431094
Averaged accuracy: 0.629866
End: 2024-12-02 22:14:13.717031
{'type_sim': 'cos', 'type_compat': 'table2', 'compat_threshold1': 0.7, 'compat_threshold2': 0.7, 'N_FOLDS': 7, 'max_epochs': 5}
Reading data ...
Done ...
Starting 7-fold cross-validation
3954 4281
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 582237.33it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 358271.80it/s]

# wrong label: 0
# correct label: 8235





Iteration: 1 MAP: 18147.164854800547
rel_cor: 469
rel_incor: 221
irrel_cor: 687
irrel_incor: 32
Relevant Accuracy: 0.67971
Irrelevant Accuracy: 0.955494
Precision: 0.936128
Recall: 0.67971
F1 score: 0.787573
Accuracy: 0.82044


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 498964.91it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 363231.69it/s]


# wrong label: 0
# correct label: 8235
Iteration: 2 MAP: 18143.649248874037
rel_cor: 473
rel_incor: 217
irrel_cor: 688
irrel_incor: 31
Relevant Accuracy: 0.685507
Irrelevant Accuracy: 0.956885
Precision: 0.938492
Recall: 0.685507
F1 score: 0.792295
Accuracy: 0.823989


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 490560.65it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 357583.42it/s]


# wrong label: 0
# correct label: 8235
Iteration: 3 MAP: 18142.601977145525
rel_cor: 474
rel_incor: 216
irrel_cor: 688
irrel_incor: 31
Relevant Accuracy: 0.686957
Irrelevant Accuracy: 0.956885
Precision: 0.938614
Recall: 0.686957
F1 score: 0.793305
Accuracy: 0.824698


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 503917.04it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 344317.86it/s]


# wrong label: 0
# correct label: 8235
Iteration: 4 MAP: 18142.601977145525
rel_cor: 474
rel_incor: 216
irrel_cor: 688
irrel_incor: 31
Relevant Accuracy: 0.686957
Irrelevant Accuracy: 0.956885
Precision: 0.938614
Recall: 0.686957
F1 score: 0.793305
Accuracy: 0.824698


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 499647.35it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 356431.67it/s]


# wrong label: 0
# correct label: 8235
Iteration: 5 MAP: 18143.289097046705
rel_cor: 472
rel_incor: 218
irrel_cor: 689
irrel_incor: 30
Relevant Accuracy: 0.684058
Irrelevant Accuracy: 0.958275
Precision: 0.940239
Recall: 0.684058
F1 score: 0.791946
Accuracy: 0.823989

3929 4307
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 511106.45it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 352558.69it/s]


# wrong label: 0
# correct label: 8236
Iteration: 1 MAP: 18135.83782415137
rel_cor: 491
rel_incor: 220
irrel_cor: 651
irrel_incor: 42
Relevant Accuracy: 0.690577
Irrelevant Accuracy: 0.939394
Precision: 0.921201
Recall: 0.690577
F1 score: 0.789389
Accuracy: 0.81339


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 443975.88it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 350348.86it/s]


# wrong label: 0
# correct label: 8236
Iteration: 2 MAP: 18132.04874996886
rel_cor: 491
rel_incor: 220
irrel_cor: 651
irrel_incor: 42
Relevant Accuracy: 0.690577
Irrelevant Accuracy: 0.939394
Precision: 0.921201
Recall: 0.690577
F1 score: 0.789389
Accuracy: 0.81339


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 431476.26it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 348011.77it/s]


# wrong label: 0
# correct label: 8236
Iteration: 3 MAP: 18131.90840677962
rel_cor: 492
rel_incor: 219
irrel_cor: 651
irrel_incor: 42
Relevant Accuracy: 0.691983
Irrelevant Accuracy: 0.939394
Precision: 0.921348
Recall: 0.691983
F1 score: 0.790361
Accuracy: 0.814103


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 468976.37it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364030.66it/s]


# wrong label: 0
# correct label: 8236
Iteration: 4 MAP: 18131.90840677962
rel_cor: 492
rel_incor: 219
irrel_cor: 651
irrel_incor: 42
Relevant Accuracy: 0.691983
Irrelevant Accuracy: 0.939394
Precision: 0.921348
Recall: 0.691983
F1 score: 0.790361
Accuracy: 0.814103


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 481356.52it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 349309.95it/s]


# wrong label: 0
# correct label: 8236
Iteration: 5 MAP: 18132.293737099026
rel_cor: 491
rel_incor: 220
irrel_cor: 651
irrel_incor: 42
Relevant Accuracy: 0.690577
Irrelevant Accuracy: 0.939394
Precision: 0.921201
Recall: 0.690577
F1 score: 0.789389
Accuracy: 0.81339

3965 4287
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 510052.79it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 376663.24it/s]


# wrong label: 0
# correct label: 8252
Iteration: 1 MAP: 18134.903596837376
rel_cor: 456
rel_incor: 228
irrel_cor: 684
irrel_incor: 29
Relevant Accuracy: 0.666667
Irrelevant Accuracy: 0.959327
Precision: 0.940206
Recall: 0.666667
F1 score: 0.780154
Accuracy: 0.816034


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 449308.29it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 367497.37it/s]


# wrong label: 0
# correct label: 8252
Iteration: 2 MAP: 18132.94208182798
rel_cor: 456
rel_incor: 228
irrel_cor: 685
irrel_incor: 28
Relevant Accuracy: 0.666667
Irrelevant Accuracy: 0.960729
Precision: 0.942149
Recall: 0.666667
F1 score: 0.780822
Accuracy: 0.81675


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 471717.12it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360609.41it/s]


# wrong label: 0
# correct label: 8252
Iteration: 3 MAP: 18132.135091085296
rel_cor: 456
rel_incor: 228
irrel_cor: 685
irrel_incor: 28
Relevant Accuracy: 0.666667
Irrelevant Accuracy: 0.960729
Precision: 0.942149
Recall: 0.666667
F1 score: 0.780822
Accuracy: 0.81675


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 448695.12it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 359765.87it/s]


# wrong label: 0
# correct label: 8252
Iteration: 4 MAP: 18132.135091085296
rel_cor: 456
rel_incor: 228
irrel_cor: 685
irrel_incor: 28
Relevant Accuracy: 0.666667
Irrelevant Accuracy: 0.960729
Precision: 0.942149
Recall: 0.666667
F1 score: 0.780822
Accuracy: 0.81675


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 460057.63it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 359058.67it/s]


# wrong label: 0
# correct label: 8252
Iteration: 5 MAP: 18143.884164650284
rel_cor: 443
rel_incor: 241
irrel_cor: 685
irrel_incor: 28
Relevant Accuracy: 0.647661
Irrelevant Accuracy: 0.960729
Precision: 0.940552
Recall: 0.647661
F1 score: 0.7671
Accuracy: 0.807445

3951 4278
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 499345.50it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 380077.92it/s]

# wrong label: 0
# correct label: 8229





Iteration: 1 MAP: 18156.572799005386
rel_cor: 449
rel_incor: 238
irrel_cor: 690
irrel_incor: 32
Relevant Accuracy: 0.653566
Irrelevant Accuracy: 0.955679
Precision: 0.933472
Recall: 0.653566
F1 score: 0.768836
Accuracy: 0.808375


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 466133.62it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364944.74it/s]


# wrong label: 0
# correct label: 8229
Iteration: 2 MAP: 18152.061753907084
rel_cor: 449
rel_incor: 238
irrel_cor: 690
irrel_incor: 32
Relevant Accuracy: 0.653566
Irrelevant Accuracy: 0.955679
Precision: 0.933472
Recall: 0.653566
F1 score: 0.768836
Accuracy: 0.808375


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 472183.81it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 376867.60it/s]


# wrong label: 0
# correct label: 8229
Iteration: 3 MAP: 18151.96415523517
rel_cor: 449
rel_incor: 238
irrel_cor: 690
irrel_incor: 32
Relevant Accuracy: 0.653566
Irrelevant Accuracy: 0.955679
Precision: 0.933472
Recall: 0.653566
F1 score: 0.768836
Accuracy: 0.808375


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 452257.02it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 352858.02it/s]


# wrong label: 0
# correct label: 8229
Iteration: 4 MAP: 18151.96415523517
rel_cor: 449
rel_incor: 238
irrel_cor: 690
irrel_incor: 32
Relevant Accuracy: 0.653566
Irrelevant Accuracy: 0.955679
Precision: 0.933472
Recall: 0.653566
F1 score: 0.768836
Accuracy: 0.808375


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 460086.24it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 350576.32it/s]


# wrong label: 0
# correct label: 8229
Iteration: 5 MAP: 18153.207828678962
rel_cor: 447
rel_incor: 240
irrel_cor: 690
irrel_incor: 32
Relevant Accuracy: 0.650655
Irrelevant Accuracy: 0.955679
Precision: 0.933194
Recall: 0.650655
F1 score: 0.766724
Accuracy: 0.806955

3939 4286
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478017.53it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 357251.00it/s]

# wrong label: 0
# correct label: 8225





Iteration: 1 MAP: 18168.5573452114
rel_cor: 457
rel_incor: 237
irrel_cor: 682
irrel_incor: 32
Relevant Accuracy: 0.658501
Irrelevant Accuracy: 0.955182
Precision: 0.93456
Recall: 0.658501
F1 score: 0.772612
Accuracy: 0.808949


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 470368.92it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 350368.77it/s]


# wrong label: 0
# correct label: 8225
Iteration: 2 MAP: 18165.78618872155
rel_cor: 458
rel_incor: 236
irrel_cor: 682
irrel_incor: 32
Relevant Accuracy: 0.659942
Irrelevant Accuracy: 0.955182
Precision: 0.934694
Recall: 0.659942
F1 score: 0.773649
Accuracy: 0.809659


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 460501.52it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360979.81it/s]


# wrong label: 0
# correct label: 8225
Iteration: 3 MAP: 18165.78618872155
rel_cor: 458
rel_incor: 236
irrel_cor: 682
irrel_incor: 32
Relevant Accuracy: 0.659942
Irrelevant Accuracy: 0.955182
Precision: 0.934694
Recall: 0.659942
F1 score: 0.773649
Accuracy: 0.809659


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478629.90it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 357392.53it/s]


# wrong label: 0
# correct label: 8225
Iteration: 4 MAP: 18165.78618872155
rel_cor: 458
rel_incor: 236
irrel_cor: 682
irrel_incor: 32
Relevant Accuracy: 0.659942
Irrelevant Accuracy: 0.955182
Precision: 0.934694
Recall: 0.659942
F1 score: 0.773649
Accuracy: 0.809659


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 471602.84it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 356262.59it/s]


# wrong label: 0
# correct label: 8225
Iteration: 5 MAP: 18471.126332223015
rel_cor: 354
rel_incor: 340
irrel_cor: 679
irrel_incor: 35
Relevant Accuracy: 0.510086
Irrelevant Accuracy: 0.95098
Precision: 0.910026
Recall: 0.510086
F1 score: 0.65374
Accuracy: 0.733665

3939 4301
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 496958.79it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364723.44it/s]


# wrong label: 0
# correct label: 8240
Iteration: 1 MAP: 18127.00116654625
rel_cor: 450
rel_incor: 250
irrel_cor: 670
irrel_incor: 29
Relevant Accuracy: 0.642857
Irrelevant Accuracy: 0.958512
Precision: 0.939457
Recall: 0.642857
F1 score: 0.763359
Accuracy: 0.800572


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 432154.30it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 357042.37it/s]


# wrong label: 0
# correct label: 8240
Iteration: 2 MAP: 18124.1675202488
rel_cor: 450
rel_incor: 250
irrel_cor: 669
irrel_incor: 30
Relevant Accuracy: 0.642857
Irrelevant Accuracy: 0.957082
Precision: 0.9375
Recall: 0.642857
F1 score: 0.762712
Accuracy: 0.799857


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 453787.00it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360706.99it/s]


# wrong label: 0
# correct label: 8240
Iteration: 3 MAP: 18124.1675202488
rel_cor: 450
rel_incor: 250
irrel_cor: 669
irrel_incor: 30
Relevant Accuracy: 0.642857
Irrelevant Accuracy: 0.957082
Precision: 0.9375
Recall: 0.642857
F1 score: 0.762712
Accuracy: 0.799857


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 465646.56it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 353974.05it/s]


# wrong label: 0
# correct label: 8240
Iteration: 4 MAP: 18124.1675202488
rel_cor: 450
rel_incor: 250
irrel_cor: 669
irrel_incor: 30
Relevant Accuracy: 0.642857
Irrelevant Accuracy: 0.957082
Precision: 0.9375
Recall: 0.642857
F1 score: 0.762712
Accuracy: 0.799857


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 454129.64it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 353740.47it/s]


# wrong label: 0
# correct label: 8240
Iteration: 5 MAP: 18127.62442245474
rel_cor: 443
rel_incor: 257
irrel_cor: 670
irrel_incor: 29
Relevant Accuracy: 0.632857
Irrelevant Accuracy: 0.958512
Precision: 0.938559
Recall: 0.632857
F1 score: 0.755973
Accuracy: 0.795568

3961 4260
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 484998.32it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 367800.67it/s]


# wrong label: 0
# correct label: 8221
Iteration: 1 MAP: 18187.847021899022
rel_cor: 463
rel_incor: 207
irrel_cor: 710
irrel_incor: 30
Relevant Accuracy: 0.691045
Irrelevant Accuracy: 0.959459
Precision: 0.939148
Recall: 0.691045
F1 score: 0.796217
Accuracy: 0.831915


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 454405.80it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360925.22it/s]


# wrong label: 0
# correct label: 8221
Iteration: 2 MAP: 18184.20845528583
rel_cor: 464
rel_incor: 206
irrel_cor: 710
irrel_incor: 30
Relevant Accuracy: 0.692537
Irrelevant Accuracy: 0.959459
Precision: 0.939271
Recall: 0.692537
F1 score: 0.797251
Accuracy: 0.832624


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 460399.78it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360875.92it/s]


# wrong label: 0
# correct label: 8221
Iteration: 3 MAP: 18183.7109534357
rel_cor: 464
rel_incor: 206
irrel_cor: 710
irrel_incor: 30
Relevant Accuracy: 0.692537
Irrelevant Accuracy: 0.959459
Precision: 0.939271
Recall: 0.692537
F1 score: 0.797251
Accuracy: 0.832624


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 453668.72it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 350314.02it/s]


# wrong label: 0
# correct label: 8221
Iteration: 4 MAP: 18183.7109534357
rel_cor: 464
rel_incor: 206
irrel_cor: 710
irrel_incor: 30
Relevant Accuracy: 0.692537
Irrelevant Accuracy: 0.959459
Precision: 0.939271
Recall: 0.692537
F1 score: 0.797251
Accuracy: 0.832624


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 447783.91it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360425.81it/s]


# wrong label: 0
# correct label: 8221
Iteration: 5 MAP: 18185.341203472417
rel_cor: 462
rel_incor: 208
irrel_cor: 710
irrel_incor: 30
Relevant Accuracy: 0.689552
Irrelevant Accuracy: 0.959459
Precision: 0.939024
Recall: 0.689552
F1 score: 0.795181
Accuracy: 0.831206

Done.

Averaged precision: 0.931828
Averaged recall: 0.643635
Averaged F1 score: 0.760007
Averaged accuracy: 0.801745
End: 2024-12-02 22:15:49.532783
{'type_sim': 'rbf', 'type_compat': 'table1', 'compat_threshold1': 0.5, 'compat_threshold2': 0.7, 'N_FOLDS': 5, 'max_epochs': 5}
Reading data ...
Done ...
Starting 5-fold cross-validation
3685 4009
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 571648.53it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 394165.80it/s]

# wrong label: 0
# correct label: 7694





Iteration: 1 MAP: 45764.46879667422
rel_cor: 812
rel_incor: 168
irrel_cor: 957
irrel_incor: 34
Relevant Accuracy: 0.828571
Irrelevant Accuracy: 0.965691
Precision: 0.959811
Recall: 0.828571
F1 score: 0.889376
Accuracy: 0.897514


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 528030.24it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 385424.54it/s]


# wrong label: 0
# correct label: 7694
Iteration: 2 MAP: 45545.6459478772
rel_cor: 849
rel_incor: 131
irrel_cor: 938
irrel_incor: 53
Relevant Accuracy: 0.866327
Irrelevant Accuracy: 0.946519
Precision: 0.941242
Recall: 0.866327
F1 score: 0.902232
Accuracy: 0.906646


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 534039.62it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 394143.75it/s]


# wrong label: 0
# correct label: 7694
Iteration: 3 MAP: 45554.80849093561
rel_cor: 845
rel_incor: 135
irrel_cor: 953
irrel_incor: 38
Relevant Accuracy: 0.862245
Irrelevant Accuracy: 0.961655
Precision: 0.956965
Recall: 0.862245
F1 score: 0.907139
Accuracy: 0.912227


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 551185.13it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 393023.46it/s]


# wrong label: 0
# correct label: 7694
Iteration: 4 MAP: 45516.36620468262
rel_cor: 829
rel_incor: 151
irrel_cor: 952
irrel_incor: 39
Relevant Accuracy: 0.845918
Irrelevant Accuracy: 0.960646
Precision: 0.955069
Recall: 0.845918
F1 score: 0.897186
Accuracy: 0.903602


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 521696.16it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 374666.87it/s]


# wrong label: 0
# correct label: 7694
Iteration: 5 MAP: 56614.40836269612
rel_cor: 190
rel_incor: 790
irrel_cor: 953
irrel_incor: 38
Relevant Accuracy: 0.193878
Irrelevant Accuracy: 0.961655
Precision: 0.833333
Recall: 0.193878
F1 score: 0.31457
Accuracy: 0.579909

3708 3991
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 479091.75it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 385282.02it/s]


# wrong label: 0
# correct label: 7699
Iteration: 1 MAP: 45689.969182065906
rel_cor: 785
rel_incor: 165
irrel_cor: 981
irrel_incor: 28
Relevant Accuracy: 0.826316
Irrelevant Accuracy: 0.97225
Precision: 0.96556
Recall: 0.826316
F1 score: 0.890528
Accuracy: 0.90148


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 495195.69it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 378956.45it/s]


# wrong label: 0
# correct label: 7699
Iteration: 2 MAP: 45453.22047977802
rel_cor: 814
rel_incor: 136
irrel_cor: 960
irrel_incor: 49
Relevant Accuracy: 0.856842
Irrelevant Accuracy: 0.951437
Precision: 0.943221
Recall: 0.856842
F1 score: 0.897959
Accuracy: 0.905564


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 492238.43it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 379364.56it/s]


# wrong label: 0
# correct label: 7699
Iteration: 3 MAP: 45452.474794677466
rel_cor: 793
rel_incor: 157
irrel_cor: 978
irrel_incor: 31
Relevant Accuracy: 0.834737
Irrelevant Accuracy: 0.969277
Precision: 0.962379
Recall: 0.834737
F1 score: 0.894025
Accuracy: 0.904033


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 512688.02it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 381392.76it/s]


# wrong label: 0
# correct label: 7699
Iteration: 4 MAP: 45461.382222703665
rel_cor: 817
rel_incor: 133
irrel_cor: 971
irrel_incor: 38
Relevant Accuracy: 0.86
Irrelevant Accuracy: 0.962339
Precision: 0.955556
Recall: 0.86
F1 score: 0.905263
Accuracy: 0.912711


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 513519.02it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 373614.79it/s]


# wrong label: 0
# correct label: 7699
Iteration: 5 MAP: 54842.54086726541
rel_cor: 253
rel_incor: 697
irrel_cor: 972
irrel_incor: 37
Relevant Accuracy: 0.266316
Irrelevant Accuracy: 0.96333
Precision: 0.872414
Recall: 0.266316
F1 score: 0.408065
Accuracy: 0.625319

3717 3980
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 528862.61it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 384862.09it/s]


# wrong label: 0
# correct label: 7697
Iteration: 1 MAP: 45815.408656152584
rel_cor: 801
rel_incor: 135
irrel_cor: 996
irrel_incor: 24
Relevant Accuracy: 0.855769
Irrelevant Accuracy: 0.976471
Precision: 0.970909
Recall: 0.855769
F1 score: 0.90971
Accuracy: 0.918712


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 485536.29it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 376416.96it/s]


# wrong label: 0
# correct label: 7697
Iteration: 2 MAP: 45573.15099027982
rel_cor: 816
rel_incor: 120
irrel_cor: 980
irrel_incor: 40
Relevant Accuracy: 0.871795
Irrelevant Accuracy: 0.960784
Precision: 0.953271
Recall: 0.871795
F1 score: 0.910714
Accuracy: 0.9182


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 498264.20it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 387306.22it/s]


# wrong label: 0
# correct label: 7697
Iteration: 3 MAP: 45627.074036391896
rel_cor: 827
rel_incor: 109
irrel_cor: 974
irrel_incor: 46
Relevant Accuracy: 0.883547
Irrelevant Accuracy: 0.954902
Precision: 0.947308
Recall: 0.883547
F1 score: 0.914317
Accuracy: 0.920757


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 489355.12it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364698.26it/s]


# wrong label: 0
# correct label: 7697
Iteration: 4 MAP: 45621.8489933335
rel_cor: 821
rel_incor: 115
irrel_cor: 986
irrel_incor: 34
Relevant Accuracy: 0.877137
Irrelevant Accuracy: 0.966667
Precision: 0.960234
Recall: 0.877137
F1 score: 0.916806
Accuracy: 0.923824


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 496656.84it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 365028.48it/s]


# wrong label: 0
# correct label: 7697
Iteration: 5 MAP: 55688.19880060118
rel_cor: 227
rel_incor: 709
irrel_cor: 992
irrel_incor: 28
Relevant Accuracy: 0.242521
Irrelevant Accuracy: 0.972549
Precision: 0.890196
Recall: 0.242521
F1 score: 0.381192
Accuracy: 0.623211

3712 3980
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 490676.16it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364562.57it/s]


# wrong label: 0
# correct label: 7692
Iteration: 1 MAP: 45823.88951931851
rel_cor: 782
rel_incor: 159
irrel_cor: 987
irrel_incor: 33
Relevant Accuracy: 0.831031
Irrelevant Accuracy: 0.967647
Precision: 0.959509
Recall: 0.831031
F1 score: 0.890661
Accuracy: 0.902091


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 499515.77it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364985.26it/s]


# wrong label: 0
# correct label: 7692
Iteration: 2 MAP: 45612.43397234315
rel_cor: 820
rel_incor: 121
irrel_cor: 975
irrel_incor: 45
Relevant Accuracy: 0.871413
Irrelevant Accuracy: 0.955882
Precision: 0.947977
Recall: 0.871413
F1 score: 0.908084
Accuracy: 0.915349


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 495358.17it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 366590.45it/s]


# wrong label: 0
# correct label: 7692
Iteration: 3 MAP: 45640.18253897069
rel_cor: 821
rel_incor: 120
irrel_cor: 971
irrel_incor: 49
Relevant Accuracy: 0.872476
Irrelevant Accuracy: 0.951961
Precision: 0.943678
Recall: 0.872476
F1 score: 0.906681
Accuracy: 0.913819


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 481500.64it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 369733.59it/s]


# wrong label: 0
# correct label: 7692
Iteration: 4 MAP: 45704.551294668934
rel_cor: 794
rel_incor: 147
irrel_cor: 987
irrel_incor: 33
Relevant Accuracy: 0.843783
Irrelevant Accuracy: 0.967647
Precision: 0.960097
Recall: 0.843783
F1 score: 0.89819
Accuracy: 0.90821


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 468815.88it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364575.15it/s]


# wrong label: 0
# correct label: 7692
Iteration: 5 MAP: 56830.2466530703
rel_cor: 146
rel_incor: 795
irrel_cor: 1007
irrel_incor: 13
Relevant Accuracy: 0.155154
Irrelevant Accuracy: 0.987255
Precision: 0.918239
Recall: 0.155154
F1 score: 0.265455
Accuracy: 0.587965

3652 4040
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 504214.13it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 372426.06it/s]

# wrong label: 0
# correct label: 7692





Iteration: 1 MAP: 45798.492231473094
rel_cor: 843
rel_incor: 153
irrel_cor: 930
irrel_incor: 30
Relevant Accuracy: 0.846386
Irrelevant Accuracy: 0.96875
Precision: 0.965636
Recall: 0.846386
F1 score: 0.902087
Accuracy: 0.906442


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 472257.66it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360945.47it/s]


# wrong label: 0
# correct label: 7692
Iteration: 2 MAP: 45546.5081996764
rel_cor: 867
rel_incor: 129
irrel_cor: 913
irrel_incor: 47
Relevant Accuracy: 0.870482
Irrelevant Accuracy: 0.951042
Precision: 0.948578
Recall: 0.870482
F1 score: 0.907853
Accuracy: 0.91002


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 492210.59it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 364901.54it/s]


# wrong label: 0
# correct label: 7692
Iteration: 3 MAP: 45501.44720767386
rel_cor: 865
rel_incor: 131
irrel_cor: 919
irrel_incor: 41
Relevant Accuracy: 0.868474
Irrelevant Accuracy: 0.957292
Precision: 0.954746
Recall: 0.868474
F1 score: 0.909569
Accuracy: 0.912065


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 485155.77it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 353555.35it/s]


# wrong label: 0
# correct label: 7692
Iteration: 4 MAP: 45484.59477876074
rel_cor: 850
rel_incor: 146
irrel_cor: 917
irrel_incor: 43
Relevant Accuracy: 0.853414
Irrelevant Accuracy: 0.955208
Precision: 0.951848
Recall: 0.853414
F1 score: 0.899947
Accuracy: 0.903374


Propagate from vertices with label: 100%|██████████| 35275/35275 [00:00<00:00, 478487.49it/s]
Propagate from vertices without label: 100%|██████████| 35275/35275 [00:00<00:00, 360757.13it/s]


# wrong label: 0
# correct label: 7692
Iteration: 5 MAP: 57264.95536338337
rel_cor: 186
rel_incor: 810
irrel_cor: 926
irrel_incor: 34
Relevant Accuracy: 0.186747
Irrelevant Accuracy: 0.964583
Precision: 0.845455
Recall: 0.186747
F1 score: 0.305921
Accuracy: 0.568507

Done.

Averaged precision: 0.871927
Averaged recall: 0.208923
Averaged F1 score: 0.33504
Averaged accuracy: 0.596982
End: 2024-12-02 22:16:55.350937
Best Score: 0.9166001703130268
Best config: {'type_sim': 'cos', 'type_compat': 'table3', 'compat_threshold1': 0.7, 'compat_threshold2': 0.5, 'N_FOLDS': 5, 'max_epochs': 5}
Best Average performance: (0.9349725840833679, 0.8974001961987661, 0.9152671083124616, 0.9187607926575115)


### Testing for different evasion conditions including:
- M1: Change domain to a random benign domain (IP changes).
- M2: Change path string to a random benign one.
- M3: Change query string to a random benign one.
- M4: Change domain and path to random benign ones.
- M5: Change domain and query string to random benign ones.
- M6: Change path and query string to random benign ones.
- M7: Change all parts (domain, path, query) to random benign ones.

In [80]:
 ## Now using the best performing graph, test evasions 
# containing evasion instances m1, m2, ... m7
test_set = [
    {"url": "015fb31.netsolhost.com/n/3.0.26/11433792/AutomationStudio6.0.exe", "truth": "1"}, #M1 URL -> benign
    {"url": "gulsproductionscar.com/2_1/sessions/result.cfm?page=44&audienceid=&trackid=&pagesize=25&q=&handouts=&date=&speakerid=&sessiontype=", "truth": "1"}, #M2 Path stgin -> benign
    {"url": "download.grandcloud.cn/9291/15474/setup_2949-14598.exe?personID=I4920&tree=ncshawfamily", "truth": "1"}, #M3 Query string -> beniegn
    {"url": "1146miles.com/music/code-pie-baby-justin-bieber-cover?low_id=2000&870470=33440/", "truth": "1"}, #M4 domain, path -> benign
    {"url": "032255hellooo.com/css/ballet-guitars-notfree-mp3?personID=I4920&tree=ncshawfamily", "truth": "1"}, #M5 domain and query -> benign
    {"url": "mobatory.com/5uxfljc5z4s6eacz305ic45h25hq3srib5tc1b51g?j8c=49ljg&pjz9ci=rapid8.com?Screen=CTGY&Store_Code=BC&Category_Code=CP", "truth": "1"}, # M6 path n Query String -> beneign
    {"url": "01453car.com/", "truth": "0"},
    {"url": "015fb31.netsolhost.com/bosstweed/notphish/good.html", "truth": "0"},
    {"url": "02bee66.netsolhosttrustme.com/lincolnhomepage/", "truth": "0"},
    {"url": "02ec0a3.netsolhosttest.com/getperson.php?personID=I4920&tree=ncshawfamily", "truth": "0"},
    {"url": "032255hellooo.com/", "truth": "0"}
]

best_model.test_evasion(best_model.g, test_set)
best_model.main() # rerun belief propogation
count = 0

for row in test_set:
    if not row["url"].startswith(("http://", "https://")): row["url"] = "http://" + row["url"]

    if row["url"] in best_model.g:
        model_prediction = best_model.g.nodes[row["url"]]["best_label"]
        print(f"Prediction  {model_prediction} \n")
        if model_prediction == int(row["truth"]): count += 1

print( count / len(test_set) * 100 )

Computing transition probabilities: 100%|██████████| 35327/35327 [02:04<00:00, 283.95it/s]


Graph complete.
Test nodes added to Graph
Reading data ...
Done ...
Starting 5-fold cross-validation
3724 3960
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 527216.82it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 67944.70it/s]


# wrong label: 0
# correct label: 7684
Iteration: 1 MAP: 18600.293811645046
rel_cor: 825
rel_incor: 91
irrel_cor: 951
irrel_incor: 89
Relevant Accuracy: 0.900655
Irrelevant Accuracy: 0.914423
Precision: 0.902626
Recall: 0.900655
F1 score: 0.901639
Accuracy: 0.907975


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 455659.22it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69409.32it/s]


# wrong label: 0
# correct label: 7684
Iteration: 2 MAP: 18595.87535002424
rel_cor: 829
rel_incor: 87
irrel_cor: 953
irrel_incor: 87
Relevant Accuracy: 0.905022
Irrelevant Accuracy: 0.916346
Precision: 0.905022
Recall: 0.905022
F1 score: 0.905022
Accuracy: 0.911043


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 462562.48it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69433.00it/s]


# wrong label: 0
# correct label: 7684
Iteration: 3 MAP: 18594.44972791864
rel_cor: 829
rel_incor: 87
irrel_cor: 953
irrel_incor: 87
Relevant Accuracy: 0.905022
Irrelevant Accuracy: 0.916346
Precision: 0.905022
Recall: 0.905022
F1 score: 0.905022
Accuracy: 0.911043


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 479740.52it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70707.27it/s]


# wrong label: 0
# correct label: 7684
Iteration: 4 MAP: 18593.30683913423
rel_cor: 829
rel_incor: 87
irrel_cor: 953
irrel_incor: 87
Relevant Accuracy: 0.905022
Irrelevant Accuracy: 0.916346
Precision: 0.905022
Recall: 0.905022
F1 score: 0.905022
Accuracy: 0.911043


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 503700.88it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69802.88it/s]


# wrong label: 0
# correct label: 7684
Iteration: 5 MAP: 18660.488351668788
rel_cor: 813
rel_incor: 103
irrel_cor: 963
irrel_incor: 77
Relevant Accuracy: 0.887555
Irrelevant Accuracy: 0.925962
Precision: 0.913483
Recall: 0.887555
F1 score: 0.900332
Accuracy: 0.907975

3687 4018
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 485326.68it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71180.23it/s]


# wrong label: 0
# correct label: 7705
Iteration: 1 MAP: 18586.732778775186
rel_cor: 897
rel_incor: 79
irrel_cor: 913
irrel_incor: 69
Relevant Accuracy: 0.919057
Irrelevant Accuracy: 0.929735
Precision: 0.928571
Recall: 0.919057
F1 score: 0.92379
Accuracy: 0.924413


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 448545.82it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71573.95it/s]


# wrong label: 0
# correct label: 7705
Iteration: 2 MAP: 18569.626544582352
rel_cor: 903
rel_incor: 73
irrel_cor: 915
irrel_incor: 67
Relevant Accuracy: 0.925205
Irrelevant Accuracy: 0.931772
Precision: 0.930928
Recall: 0.925205
F1 score: 0.928058
Accuracy: 0.928498


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 450875.68it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71162.83it/s]


# wrong label: 0
# correct label: 7705
Iteration: 3 MAP: 18567.66490940999
rel_cor: 901
rel_incor: 75
irrel_cor: 915
irrel_incor: 67
Relevant Accuracy: 0.923156
Irrelevant Accuracy: 0.931772
Precision: 0.930785
Recall: 0.923156
F1 score: 0.926955
Accuracy: 0.927477


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 448210.68it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70358.09it/s]


# wrong label: 0
# correct label: 7705
Iteration: 4 MAP: 18567.250353860367
rel_cor: 901
rel_incor: 75
irrel_cor: 915
irrel_incor: 67
Relevant Accuracy: 0.923156
Irrelevant Accuracy: 0.931772
Precision: 0.930785
Recall: 0.923156
F1 score: 0.926955
Accuracy: 0.927477


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 432841.92it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70875.87it/s]


# wrong label: 0
# correct label: 7705
Iteration: 5 MAP: 18560.16659610223
rel_cor: 899
rel_incor: 77
irrel_cor: 916
irrel_incor: 66
Relevant Accuracy: 0.921107
Irrelevant Accuracy: 0.93279
Precision: 0.931606
Recall: 0.921107
F1 score: 0.926327
Accuracy: 0.926966

3659 4022
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 449944.66it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 61824.01it/s]


# wrong label: 0
# correct label: 7681
Iteration: 1 MAP: 18593.397308406333
rel_cor: 910
rel_incor: 79
irrel_cor: 924
irrel_incor: 54
Relevant Accuracy: 0.920121
Irrelevant Accuracy: 0.944785
Precision: 0.943983
Recall: 0.920121
F1 score: 0.9319
Accuracy: 0.932384


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 437700.66it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 68252.32it/s]


# wrong label: 0
# correct label: 7681
Iteration: 2 MAP: 18583.607648071757
rel_cor: 915
rel_incor: 74
irrel_cor: 924
irrel_incor: 54
Relevant Accuracy: 0.925177
Irrelevant Accuracy: 0.944785
Precision: 0.944272
Recall: 0.925177
F1 score: 0.934627
Accuracy: 0.934926


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 433269.72it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 68726.84it/s]


# wrong label: 0
# correct label: 7681
Iteration: 3 MAP: 18580.642349955553
rel_cor: 915
rel_incor: 74
irrel_cor: 925
irrel_incor: 53
Relevant Accuracy: 0.925177
Irrelevant Accuracy: 0.945808
Precision: 0.945248
Recall: 0.925177
F1 score: 0.935105
Accuracy: 0.935435


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 426388.62it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 65974.40it/s]


# wrong label: 0
# correct label: 7681
Iteration: 4 MAP: 18580.642349955553
rel_cor: 915
rel_incor: 74
irrel_cor: 925
irrel_incor: 53
Relevant Accuracy: 0.925177
Irrelevant Accuracy: 0.945808
Precision: 0.945248
Recall: 0.925177
F1 score: 0.935105
Accuracy: 0.935435


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 426511.36it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 67099.70it/s]


# wrong label: 0
# correct label: 7681
Iteration: 5 MAP: 18663.534048971575
rel_cor: 856
rel_incor: 133
irrel_cor: 915
irrel_incor: 63
Relevant Accuracy: 0.865521
Irrelevant Accuracy: 0.935583
Precision: 0.931447
Recall: 0.865521
F1 score: 0.897275
Accuracy: 0.900356

3681 4009
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 469272.04it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69542.01it/s]


# wrong label: 0
# correct label: 7690
Iteration: 1 MAP: 18623.79265483565
rel_cor: 912
rel_incor: 58
irrel_cor: 915
irrel_incor: 76
Relevant Accuracy: 0.940206
Irrelevant Accuracy: 0.92331
Precision: 0.923077
Recall: 0.940206
F1 score: 0.931563
Accuracy: 0.931668


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 438524.53it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69721.49it/s]


# wrong label: 0
# correct label: 7690
Iteration: 2 MAP: 18608.84239747474
rel_cor: 911
rel_incor: 59
irrel_cor: 914
irrel_incor: 77
Relevant Accuracy: 0.939175
Irrelevant Accuracy: 0.922301
Precision: 0.922065
Recall: 0.939175
F1 score: 0.930541
Accuracy: 0.930648


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 415657.05it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69882.68it/s]


# wrong label: 0
# correct label: 7690
Iteration: 3 MAP: 18607.980275058242
rel_cor: 911
rel_incor: 59
irrel_cor: 914
irrel_incor: 77
Relevant Accuracy: 0.939175
Irrelevant Accuracy: 0.922301
Precision: 0.922065
Recall: 0.939175
F1 score: 0.930541
Accuracy: 0.930648


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 422836.84it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 68995.37it/s]


# wrong label: 0
# correct label: 7690
Iteration: 4 MAP: 18607.980275058242
rel_cor: 911
rel_incor: 59
irrel_cor: 914
irrel_incor: 77
Relevant Accuracy: 0.939175
Irrelevant Accuracy: 0.922301
Precision: 0.922065
Recall: 0.939175
F1 score: 0.930541
Accuracy: 0.930648


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 432171.55it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69594.00it/s]


# wrong label: 0
# correct label: 7690
Iteration: 5 MAP: 18583.73189099978
rel_cor: 902
rel_incor: 68
irrel_cor: 932
irrel_incor: 59
Relevant Accuracy: 0.929897
Irrelevant Accuracy: 0.940464
Precision: 0.938606
Recall: 0.929897
F1 score: 0.934231
Accuracy: 0.935237

3716 3991
embedding done
Done.


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 473738.63it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71093.82it/s]


# wrong label: 0
# correct label: 7707
Iteration: 1 MAP: 18597.964237935044
rel_cor: 865
rel_incor: 78
irrel_cor: 941
irrel_incor: 68
Relevant Accuracy: 0.917285
Irrelevant Accuracy: 0.932607
Precision: 0.927117
Recall: 0.917285
F1 score: 0.922175
Accuracy: 0.925205


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 425676.92it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69092.37it/s]


# wrong label: 0
# correct label: 7707
Iteration: 2 MAP: 18589.43812680911
rel_cor: 869
rel_incor: 74
irrel_cor: 942
irrel_incor: 67
Relevant Accuracy: 0.921527
Irrelevant Accuracy: 0.933598
Precision: 0.928419
Recall: 0.921527
F1 score: 0.92496
Accuracy: 0.927766


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 438974.05it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70073.70it/s]


# wrong label: 0
# correct label: 7707
Iteration: 3 MAP: 18587.34422040652
rel_cor: 869
rel_incor: 74
irrel_cor: 941
irrel_incor: 68
Relevant Accuracy: 0.921527
Irrelevant Accuracy: 0.932607
Precision: 0.927428
Recall: 0.921527
F1 score: 0.924468
Accuracy: 0.927254


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 422324.64it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70240.49it/s]


# wrong label: 0
# correct label: 7707
Iteration: 4 MAP: 18587.070633298863
rel_cor: 869
rel_incor: 74
irrel_cor: 941
irrel_incor: 68
Relevant Accuracy: 0.921527
Irrelevant Accuracy: 0.932607
Precision: 0.927428
Recall: 0.921527
F1 score: 0.924468
Accuracy: 0.927254


Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 417359.38it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70340.66it/s]

# wrong label: 0
# correct label: 7707
Iteration: 5 MAP: 18599.812279648286
rel_cor: 841
rel_incor: 102
irrel_cor: 954
irrel_incor: 55
Relevant Accuracy: 0.891835
Irrelevant Accuracy: 0.945491
Precision: 0.938616
Recall: 0.891835
F1 score: 0.914628
Accuracy: 0.91957

Done.

Averaged precision: 0.930752
Averaged recall: 0.899183
Averaged F1 score: 0.914558
Averaged accuracy: 0.918021
End: 2024-12-02 22:40:21.505236
Prediction  1 

Prediction  1 

Prediction  1 

Prediction  0 

Prediction  1 

Prediction  1 

Prediction  0 

Prediction  1 

Prediction  0 

Prediction  1 

Prediction  0 

72.72727272727273





Computing transition probabilities: 100%|██████████| 35327/35327 [02:04<00:00, 283.95it/s]
Graph complete.
Test nodes added to Graph
Reading data ...
Done ...
Starting 5-fold cross-validation
3724 3960
embedding done
Done.
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 527216.82it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 67944.70it/s]
# wrong label: 0
# correct label: 7684
Iteration: 1 MAP: 18600.293811645046
rel_cor: 825
rel_incor: 91
irrel_cor: 951
irrel_incor: 89
Relevant Accuracy: 0.900655
Irrelevant Accuracy: 0.914423
Precision: 0.902626
Recall: 0.900655
F1 score: 0.901639
Accuracy: 0.907975
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 455659.22it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69409.32it/s]
# wrong label: 0
# correct label: 7684
Iteration: 2 MAP: 18595.87535002424
rel_cor: 829
rel_incor: 87
irrel_cor: 953
irrel_incor: 87
Relevant Accuracy: 0.905022
Irrelevant Accuracy: 0.916346
Precision: 0.905022
Recall: 0.905022
F1 score: 0.905022
Accuracy: 0.911043
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 462562.48it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69433.00it/s]
# wrong label: 0
# correct label: 7684
Iteration: 3 MAP: 18594.44972791864
rel_cor: 829
rel_incor: 87
irrel_cor: 953
irrel_incor: 87
Relevant Accuracy: 0.905022
Irrelevant Accuracy: 0.916346
Precision: 0.905022
Recall: 0.905022
F1 score: 0.905022
Accuracy: 0.911043
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 479740.52it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70707.27it/s]
# wrong label: 0
# correct label: 7684
Iteration: 4 MAP: 18593.30683913423
rel_cor: 829
rel_incor: 87
irrel_cor: 953
irrel_incor: 87
Relevant Accuracy: 0.905022
Irrelevant Accuracy: 0.916346
Precision: 0.905022
Recall: 0.905022
F1 score: 0.905022
Accuracy: 0.911043
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 503700.88it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69802.88it/s]
# wrong label: 0
# correct label: 7684
Iteration: 5 MAP: 18660.488351668788
rel_cor: 813
rel_incor: 103
irrel_cor: 963
irrel_incor: 77
Relevant Accuracy: 0.887555
Irrelevant Accuracy: 0.925962
Precision: 0.913483
Recall: 0.887555
F1 score: 0.900332
Accuracy: 0.907975

3687 4018
embedding done
Done.
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 485326.68it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71180.23it/s]
# wrong label: 0
# correct label: 7705
Iteration: 1 MAP: 18586.732778775186
rel_cor: 897
rel_incor: 79
irrel_cor: 913
irrel_incor: 69
Relevant Accuracy: 0.919057
Irrelevant Accuracy: 0.929735
Precision: 0.928571
Recall: 0.919057
F1 score: 0.92379
Accuracy: 0.924413
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 448545.82it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71573.95it/s]
# wrong label: 0
# correct label: 7705
Iteration: 2 MAP: 18569.626544582352
rel_cor: 903
rel_incor: 73
irrel_cor: 915
irrel_incor: 67
Relevant Accuracy: 0.925205
Irrelevant Accuracy: 0.931772
Precision: 0.930928
Recall: 0.925205
F1 score: 0.928058
Accuracy: 0.928498
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 450875.68it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71162.83it/s]
# wrong label: 0
# correct label: 7705
Iteration: 3 MAP: 18567.66490940999
rel_cor: 901
rel_incor: 75
irrel_cor: 915
irrel_incor: 67
Relevant Accuracy: 0.923156
Irrelevant Accuracy: 0.931772
Precision: 0.930785
Recall: 0.923156
F1 score: 0.926955
Accuracy: 0.927477
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 448210.68it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70358.09it/s]
# wrong label: 0
# correct label: 7705
Iteration: 4 MAP: 18567.250353860367
rel_cor: 901
rel_incor: 75
irrel_cor: 915
irrel_incor: 67
Relevant Accuracy: 0.923156
Irrelevant Accuracy: 0.931772
Precision: 0.930785
Recall: 0.923156
F1 score: 0.926955
Accuracy: 0.927477
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 432841.92it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70875.87it/s]
# wrong label: 0
# correct label: 7705
Iteration: 5 MAP: 18560.16659610223
rel_cor: 899
rel_incor: 77
irrel_cor: 916
irrel_incor: 66
Relevant Accuracy: 0.921107
Irrelevant Accuracy: 0.93279
Precision: 0.931606
Recall: 0.921107
F1 score: 0.926327
Accuracy: 0.926966

3659 4022
embedding done
Done.
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 449944.66it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 61824.01it/s]
# wrong label: 0
# correct label: 7681
Iteration: 1 MAP: 18593.397308406333
rel_cor: 910
rel_incor: 79
irrel_cor: 924
irrel_incor: 54
Relevant Accuracy: 0.920121
Irrelevant Accuracy: 0.944785
Precision: 0.943983
Recall: 0.920121
F1 score: 0.9319
Accuracy: 0.932384
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 437700.66it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 68252.32it/s]
# wrong label: 0
# correct label: 7681
Iteration: 2 MAP: 18583.607648071757
rel_cor: 915
rel_incor: 74
irrel_cor: 924
irrel_incor: 54
Relevant Accuracy: 0.925177
Irrelevant Accuracy: 0.944785
Precision: 0.944272
Recall: 0.925177
F1 score: 0.934627
Accuracy: 0.934926
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 433269.72it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 68726.84it/s]
# wrong label: 0
# correct label: 7681
Iteration: 3 MAP: 18580.642349955553
rel_cor: 915
rel_incor: 74
irrel_cor: 925
irrel_incor: 53
Relevant Accuracy: 0.925177
Irrelevant Accuracy: 0.945808
Precision: 0.945248
Recall: 0.925177
F1 score: 0.935105
Accuracy: 0.935435
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 426388.62it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 65974.40it/s]
# wrong label: 0
# correct label: 7681
Iteration: 4 MAP: 18580.642349955553
rel_cor: 915
rel_incor: 74
irrel_cor: 925
irrel_incor: 53
Relevant Accuracy: 0.925177
Irrelevant Accuracy: 0.945808
Precision: 0.945248
Recall: 0.925177
F1 score: 0.935105
Accuracy: 0.935435
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 426511.36it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 67099.70it/s]
# wrong label: 0
# correct label: 7681
Iteration: 5 MAP: 18663.534048971575
rel_cor: 856
rel_incor: 133
irrel_cor: 915
irrel_incor: 63
Relevant Accuracy: 0.865521
Irrelevant Accuracy: 0.935583
Precision: 0.931447
Recall: 0.865521
F1 score: 0.897275
Accuracy: 0.900356

3681 4009
embedding done
Done.
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 469272.04it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69542.01it/s]
# wrong label: 0
# correct label: 7690
Iteration: 1 MAP: 18623.79265483565
rel_cor: 912
rel_incor: 58
irrel_cor: 915
irrel_incor: 76
Relevant Accuracy: 0.940206
Irrelevant Accuracy: 0.92331
Precision: 0.923077
Recall: 0.940206
F1 score: 0.931563
Accuracy: 0.931668
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 438524.53it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69721.49it/s]
# wrong label: 0
# correct label: 7690
Iteration: 2 MAP: 18608.84239747474
rel_cor: 911
rel_incor: 59
irrel_cor: 914
irrel_incor: 77
Relevant Accuracy: 0.939175
Irrelevant Accuracy: 0.922301
Precision: 0.922065
Recall: 0.939175
F1 score: 0.930541
Accuracy: 0.930648
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 415657.05it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69882.68it/s]
# wrong label: 0
# correct label: 7690
Iteration: 3 MAP: 18607.980275058242
rel_cor: 911
rel_incor: 59
irrel_cor: 914
irrel_incor: 77
Relevant Accuracy: 0.939175
Irrelevant Accuracy: 0.922301
Precision: 0.922065
Recall: 0.939175
F1 score: 0.930541
Accuracy: 0.930648
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 422836.84it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 68995.37it/s]
# wrong label: 0
# correct label: 7690
Iteration: 4 MAP: 18607.980275058242
rel_cor: 911
rel_incor: 59
irrel_cor: 914
irrel_incor: 77
Relevant Accuracy: 0.939175
Irrelevant Accuracy: 0.922301
Precision: 0.922065
Recall: 0.939175
F1 score: 0.930541
Accuracy: 0.930648
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 432171.55it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69594.00it/s]
# wrong label: 0
# correct label: 7690
Iteration: 5 MAP: 18583.73189099978
rel_cor: 902
rel_incor: 68
irrel_cor: 932
irrel_incor: 59
Relevant Accuracy: 0.929897
Irrelevant Accuracy: 0.940464
Precision: 0.938606
Recall: 0.929897
F1 score: 0.934231
Accuracy: 0.935237

3716 3991
embedding done
Done.
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 473738.63it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 71093.82it/s]
# wrong label: 0
# correct label: 7707
Iteration: 1 MAP: 18597.964237935044
rel_cor: 865
rel_incor: 78
irrel_cor: 941
irrel_incor: 68
Relevant Accuracy: 0.917285
Irrelevant Accuracy: 0.932607
Precision: 0.927117
Recall: 0.917285
F1 score: 0.922175
Accuracy: 0.925205
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 425676.92it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 69092.37it/s]
# wrong label: 0
# correct label: 7707
Iteration: 2 MAP: 18589.43812680911
rel_cor: 869
rel_incor: 74
irrel_cor: 942
irrel_incor: 67
Relevant Accuracy: 0.921527
Irrelevant Accuracy: 0.933598
Precision: 0.928419
Recall: 0.921527
F1 score: 0.92496
Accuracy: 0.927766
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 438974.05it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70073.70it/s]
# wrong label: 0
# correct label: 7707
Iteration: 3 MAP: 18587.34422040652
rel_cor: 869
rel_incor: 74
irrel_cor: 941
irrel_incor: 68
Relevant Accuracy: 0.921527
Irrelevant Accuracy: 0.932607
Precision: 0.927428
Recall: 0.921527
F1 score: 0.924468
Accuracy: 0.927254
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 422324.64it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70240.49it/s]
# wrong label: 0
# correct label: 7707
Iteration: 4 MAP: 18587.070633298863
rel_cor: 869
rel_incor: 74
irrel_cor: 941
irrel_incor: 68
Relevant Accuracy: 0.921527
Irrelevant Accuracy: 0.932607
Precision: 0.927428
Recall: 0.921527
F1 score: 0.924468
Accuracy: 0.927254
Propagate from vertices with label: 100%|██████████| 35327/35327 [00:00<00:00, 417359.38it/s]
Propagate from vertices without label: 100%|██████████| 35327/35327 [00:00<00:00, 70340.66it/s]# wrong label: 0
# correct label: 7707
Iteration: 5 MAP: 18599.812279648286
rel_cor: 841
rel_incor: 102
irrel_cor: 954
irrel_incor: 55
Relevant Accuracy: 0.891835
Irrelevant Accuracy: 0.945491
Precision: 0.938616
Recall: 0.891835
F1 score: 0.914628
Accuracy: 0.91957

Done.

Averaged precision: 0.930752
Averaged recall: 0.899183
Averaged F1 score: 0.914558
Averaged accuracy: 0.918021
End: 2024-12-02 22:40:21.505236
Prediction  1 

Prediction  1 

Prediction  1 

Prediction  0 

Prediction  1 

Prediction  1 

Prediction  0 

Prediction  1 

Prediction  0 

Prediction  1 

Prediction  0 

72.72727272727273

