### `import` Packages

In [1]:
import os
import random
import time

import numpy as np
import pandas as pd
import networkx as nx

from matplotlib import pyplot as plt

### Random Training Data

In [2]:
train_array = np.random.randint(low=1, high=1000, size=(1000, 2))
train_df = pd.DataFrame(data=train_array, columns=['source_nodes', 'destination_nodes'])

In [3]:
train_df.head()

Unnamed: 0,source_nodes,destination_nodes
0,912,792
1,607,258
2,340,950
3,642,819
4,309,614


In [4]:
train_df.shape

(1000, 2)

### Graph from DataFrame

In [5]:
train_graph = nx.from_pandas_edgelist(
    df=train_df, source='source_nodes', target='destination_nodes', create_using=nx.DiGraph()
)

In [6]:
print(nx.info(G=train_graph))

Name: 
Type: DiGraph
Number of nodes: 871
Number of edges: 998
Average in degree:   1.1458
Average out degree:   1.1458


In [7]:
set(train_graph.successors(n=123))

{588, 787, 913}

In [8]:
train_df[train_df['source_nodes'] == 123]

Unnamed: 0,source_nodes,destination_nodes
334,123,913
432,123,787
692,123,588


In [9]:
set(train_graph.predecessors(n=123))

set()

In [10]:
train_df[train_df['destination_nodes'] == 123]

Unnamed: 0,source_nodes,destination_nodes


### Graph Distances

* Jaccard Distance - The larger the distance, the higher the probability of event occurring.

\begin{equation}
j = \frac{|X\cap Y|}{|X \cup Y|} 
\end{equation}

* Consine Distance (Otsuka-Ochiai Coefficient) - The larger the distance, the higher the probability of event occurring.

\begin{equation}
c = \frac{|X\cap Y|}{\sqrt{|X|\cdot|Y|}} 
\end{equation}

* Adamic/Adar Index - measures is defined as inverted sum of degrees of common neighbours for given two vertices.

$$A(x,y)=\sum_{u \in N(x) \cap N(y)}\frac{1}{log(|N(u)|)}$$

In [11]:
class GraphDistances:
    def __init__(self, Graph, node_1, node_2):
        self.Graph = Graph
        self.node_1 = node_1
        self.node_2 = node_2
    
    def follows_back(self):
        val = 1 if self.Graph.has_edge(u=self.node_2, v=self.node_1) else 0
        return val
    
    def jaccard_followees(self):
        try:
            node_1_successors = set(self.Graph.successors(n=self.node_1))
            node_2_successors = set(self.Graph.successors(n=self.node_2))
            
            if (len(node_1_successors) == 0) or (len(node_2_successors) == 0):
                return 0
        
            nume = len(node_1_successors.intersection(node_2_successors))
            deno = len(node_1_successors.union(node_2_successors))
            
            sim = nume / deno
        except Exception as e:
            sim = 0
        
        return sim
    
    def jaccard_followers(self):
        try:
            node_1_predecessors = set(self.Graph.predecessors(n=self.node_1))
            node_2_predecessors = set(self.Graph.predecessors(n=self.node_2))
            
            if (len(node_1_predecessors) == 0) or (len(node_2_predecessors) == 0):
                return 0
        
            nume = len(node_1_predecessors.intersection(node_2_predecessors))
            deno = len(node_1_predecessors.union(node_2_predecessors))
            
            sim = nume / deno
        except Exception as e:
            sim = 0
        
        return sim
    
    def cosine_followees(self):
        try:
            node_1_successors = set(self.Graph.successors(n=self.node_1))
            node_2_successors = set(self.Graph.successors(n=self.node_2))
            
            if (len(node_1_successors) == 0) or (len(node_2_successors) == 0):
                return 0
        
            nume = len(node_1_successors.intersection(node_2_successors))
            deno = (len(node_1_successors) * len(node_2_successors)) ** 2
            
            sim = nume / deno
        except Exception as e:
            sim = 0
        
        return sim
    
    def cosing_followers(self):
        try:
            node_1_predecessors = set(self.Graph.predecessors(n=self.node_1))
            node_2_predecessors = set(self.Graph.predecessors(n=self.node_2))
            
            if (len(node_1_predecessors) == 0) or (len(node_2_predecessors) == 0):
                return 0
        
            nume = len(node_1_predecessors.intersection(node_2_predecessors))
            deno = (len(node_1_predecessors) * len(node_2_predecessors)) ** 2
            
            sim = nume / deno
        except Exception as e:
            sim = 0
        
        return sim
    
    def shortest_path_length(self):
        try:
            if self.Graph.has_edge(u=self.node_1, v=self.node_2):
                self.Graph.remove_edge(u=self.node_1, v=self.node_2)
                p = nx.shortest_path_length(G=self.Graph, source=self.node_1, target=self.node_2)
                self.Graph.add_edge(u_of_edge=self.node_1, v_of_edge=self.node_2)
            else:
                p = nx.shortest_path_length(G=self.Graph, source=self.node_1, target=self.node_2)
        except Exception as e:
            p = -1
        
        return p
    
    def isin_same_wcc(self):
        wcc = list(nx.weakly_connected_components(G=self.Graph))
        index = []

        if self.Graph.has_edge(u=self.node_2, v=self.node_1):
            return 1

        elif self.Graph.has_edge(u=self.node_1, v=self.node_2):
            for i in wcc:
                if self.node_1 in i:
                    index = i
                    break

            if (self.node_2 in index):
                self.Graph.remove_edge(u=self.node_1, v=self.node_2)
                if (self.shortest_path_length() == -1):
                    self.Graph.add_edge(u=self.node_1, v=self.node_2)
                    return 0
                else:
                    self.Graph.add_edge(u=self.node_1, v=self.node_2)
                    return 1
            else:
                return 0

        else:
            for i in wcc:
                if self.node_1 in i:
                    index = i
                    break
            if (self.node_2 in index):
                return 1
            else:
                return 0

        return None
    
    def adar_index(self):        
        try:
            node_1_successors = set(self.Graph.successors(n=self.node_1))
            node_2_successors = set(self.Graph.successors(n=self.node_2))
            n_list = node_1_successors.intersection(node_2_successors)
            
            if (len(n_list) != 0):
                sum_ = 0
                for i in n_list:
                    sum_ += 1 / np.log10(len(list(self.Graph.predecessors(i))))
            else:
                sum_ = 0
        except Exception as e:
            sum_ = 0
        
        return sum_

In [12]:
gd = GraphDistances(
    Graph=train_graph,
    node_1=123,
    node_2=154
)

**Follow Back**

In [13]:
gd.follows_back()

0

**Jaccard Index**

In [14]:
gd.jaccard_followees()

0.0

In [15]:
gd.jaccard_followers()

0

**Cosine Distance**

In [16]:
gd.cosine_followees()

0.0

In [17]:
gd.cosing_followers()

0

**Shortest Path Length**

In [18]:
gd.shortest_path_length()

-1

**Same Community Check**

In [19]:
gd.isin_same_wcc()

1

**Adamic/Adar Index**

In [20]:
gd.adar_index()

0

### Weight Features

In [22]:
class NodeWeightFeatures:
    def __init__(self, Graph, node):
        self.Graph = Graph
        self.node = node
    
    def in_degree_weight(self):
        in_deg = len(set(self.Graph.predecessors(self.node)))
        in_weight = 1 / np.sqrt(1 + in_deg)
        return in_weight
    
    def out_degree_weight(self):
        out_deg = len(set(self.Graph.successors(self.node)))
        out_weight = 1 / np.sqrt(1 + out_deg)
        return out_weight

In [26]:
nwf = NodeWeightFeatures(Graph=train_graph, node=123)

In [27]:
nwf.in_degree_weight()

1.0

In [28]:
nwf.out_degree_weight()

0.5