In [2]:
import networkx as nx
from itertools import combinations
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import fileDefinitions as fd
import utils.helper_methods as helper_methods
from networkx.algorithms import bipartite
import time


In [6]:


try:
    # gph = nx.read_gexf(fd.fullyConnGraphFile)
    # helper_methods.logData("graph file found at " + str())
    gph = nx.read_gexf("./fullyConnGraph.gexf")
except Exception as e:
    helper_methods.logData(e)
    gph = nx.Graph()
    helper_methods.logData("new graph file generated")


In [7]:

B = gph
# users = []
# issues = []
# for n in B:
#     if(n == 0):
#         users.append(n)
#     else:
#         issues.append(n)
users = [n for n, d in B.nodes(data=True) if d["bipartite"] == 0]
issues = [n for n, d in B.nodes(data=True) if d["bipartite"] == 1]


In [7]:


# create adjacency matrices for the users and issues
users_matrix = nx.bipartite.biadjacency_matrix(B, row_order=users)
issues_matrix = nx.bipartite.biadjacency_matrix(B, row_order=issues)




In [8]:
print(issues_matrix[:,[1]])

  (2969, 0)	1
  (2970, 0)	1
  (2971, 0)	1
  (2972, 0)	1
  (2977, 0)	1
  (2984, 0)	1
  (2990, 0)	1
  (2994, 0)	1


In [9]:


def jaccard_coefficient(graph, node1, node2):
    neighbors1 = set(graph.neighbors(node1))
    neighbors2 = set(graph.neighbors(node2))
    intersection = neighbors1.intersection(neighbors2)
    union = neighbors1.union(neighbors2)
    if len(union) == 0:
        return 0
    return len(intersection) / len(union)

In [23]:


def jaccard_link_prediction(graph):
    predicted_edges = []
    count = 10000
    userCount = 0
    for node1 in users:
        
        userCount += 1
        if(userCount%100 == 0):
            print(userCount)
        for node2 in issues:
            
            if node1 == node2 or graph.has_edge(node1, node2):
                continue
            
            jaccard = jaccard_coefficient(graph, node1, node2)
            if(jaccard != 0):
                predicted_edges.append((node1, node2, jaccard))
                count -= 1
                if(count == 0):
                    print(node1,end="-------")
                    print(node2, end="-------")
                    print(jaccard)
                    count = 10000
                    
    return sorted(predicted_edges, key=lambda x: x[2], reverse=True)


In [24]:
print(B)
print(len(users))
print(len(issues))


Graph with 11406 nodes and 1391375 edges
3126
8280


In [25]:
print("Node1 ------ Node2 ------ Jacord Coeff")
print(jaccard_link_prediction(B))

Node1 ------ Node2 ------ Jacord Coeff
stuartmorgan-------PR_kwDOCGzUbs5JeGV7-------0.0007012622720897616
cpswan-------MDExOlB1bGxSZXF1ZXN0Mzk3MTI4NTE5-------0.0006711409395973154
GIfatahTH-------MDExOlB1bGxSZXF1ZXN0Mzg1NDQwMTcw-------0.0006557377049180328
aouahib-------PR_kwDOCGzUbs5KXmy--------0.0006514657980456026
mazihao66-------MDExOlB1bGxSZXF1ZXN0NDk1OTU2MTI1-------0.000700770847932726
Wanchen7-------MDExOlB1bGxSZXF1ZXN0MzQzODYzMjY5-------0.002036659877800407
EightMinuteYouth-------PR_kwDOB40ng85I_BMH-------0.0013477088948787063
gendseo-------PR_kwDOD0USHc5LIk5j-------0.0006765899864682003
mishrabhilash-------PR_kwDOD0USHc5KP8qy-------0.0006993006993006993
socialmad-------MDExOlB1bGxSZXF1ZXN0NTcwMTYzODEw-------0.0006978367062107466
SakurasDuck-------PR_kwDOEG4Joc49HsRQ-------0.000657030223390276
chejdj-------MDExOlB1bGxSZXF1ZXN0NDk3Mjg4NjE5-------0.0006724949562878278
mridul-dhiman-------MDExOlB1bGxSZXF1ZXN0Mjc4MDI3MTc5-------0.0006540222367560497
100
Mohit-Joshi-dev-------PR_kwD

KeyboardInterrupt: 

In [35]:
# preferential attachment 
def prefAttachment(graph):
    predicted_edges = []
    count = 10000
    userCount = 0
    for node1 in users:
        userCount += 1
        if(userCount%100 == 0):
            print(userCount)
        for node2 in issues:
            if node1 == node2 or graph.has_edge(node1, node2):
                continue
            score = nx.preferential_attachment(graph, (node1,node2))
            for u,v, p in score:
                print(f"({u}, {v}) -> {p}")
            # if(score != 0):
            #     predicted_edges.append((node1, node2, score))
            #     count -= 1
            #     if(count == 0):
            #         print(node1,end="-------")
            #         print(node2, end="-------")
            #         print(score)
                    # count = 10000
                    
    return sorted(predicted_edges, key=lambda x: x[2], reverse=True)


In [None]:
prefAttachment(B)

In [None]:

def adamic_adar_coefficient(graph, node1, node2):
    neighbors1 = set(graph.neighbors(node1))
    neighbors2 = set(graph.neighbors(node2))
    common_neighbors = neighbors1.intersection(neighbors2)
    aa = sum([1 / math.log(len(set(graph.neighbors(neighbor)))) for neighbor in common_neighbors])
    return aa

def adamic_adar_link_prediction(graph):
    predicted_edges = []
    for node1 in graph.nodes():
        for node2 in graph.nodes():
            if node1 == node2 or graph.has_edge(node1, node2):
                continue
            aa = adamic_adar_coefficient(graph, node1, node2)
            predicted_edges.append((node1, node2, aa))
    return sorted(predicted_edges, key=lambda x: x[2], reverse=True)

In [None]:

# compute the Adamic-Adar similarity between all pairs of users and issues
aa_matrix = np.zeros((len(users), len(issues)))
for u, i in combinations(range(len(users)), 2):
    u_vec = users_matrix[u].toarray()[0]
    i_vec = issues_matrix[i].toarray()[0]
    common = np.where(np.logical_and(u_vec, i_vec))[0]
    aa = 0
    for c in common:
        aa += 1 / np.log(len(np.where(users_matrix[:, c].toarray())[0]))
    aa_matrix[u][i] = aa
    aa_matrix[i][u] = aa


In [49]:
def katz(gph, set1, set2, alpha, beta, k):
    A = nx.bipartite.biadjacency_matrix(gph, row_order = set1, column_order = set2)
    m, n = A.shape
    A_L = nx.bipartite.projected_graph(gph, set1)
    A_R = nx.bipartite.projected_graph(gph, set2)
    A_pred = np.zeros((m, n))

    for i in range(k):
        # Calculate the Katz centrality of the left and right projection
        K_L = alpha * np.dot(A_L, K_L) + beta * A_L
        K_R = alpha * np.dot(A_R, K_R) + beta * A_R

        # Calculate the predicted adjacency matrix using the Katz scores
        A_pred = np.dot(K_L, K_R.T)

    return A_pred


def katz_bipartite_link_prediction(adj_matrix, beta, max_iterations, convergence_threshold=1e-6):

    # Compute the maximum eigenvalue of the adjacency matrix
    max_eigval = np.linalg.eigvals(adj_matrix).max()

    # Compute the Katz scores using the formula: (I - beta * A)^-1 - I
    katz_matrix = np.linalg.inv(np.eye(adj_matrix.shape[0]) - beta * adj_matrix) - np.eye(adj_matrix.shape[0])

    # Scale the Katz scores by powers of beta up to the maximum eigenvalue
    for i in range(max_iterations):
        katz_matrix *= beta
        if np.abs(katz_matrix).max() / max_eigval < convergence_threshold:
            break

    return katz_matrix

A = nx.bipartite.biadjacency_matrix(gph, row_order = users, column_order = issues)

adj_array = np.asarray(A.toarray())

# print(adj_array)

print(katz_bipartite_link_prediction(adj_array,0.1,1))






LinAlgError: Last 2 dimensions of the array must be square