In [1]:
import pandas as pd
df = pd.read_csv("/home/karan/SNA/data/data.csv", sep ='\t', header=None, names = ["UserID", "ItemID", "Rating", "Timestamp"])
df.describe()

Unnamed: 0,UserID,ItemID,Rating,Timestamp
count,100000.0,100000.0,100000.0,100000.0
mean,462.48475,425.53013,3.52986,883528900.0
std,266.61442,330.798356,1.125674,5343856.0
min,1.0,1.0,1.0,874724700.0
25%,254.0,175.0,3.0,879448700.0
50%,447.0,322.0,4.0,882826900.0
75%,682.0,631.0,4.0,888260000.0
max,943.0,1682.0,5.0,893286600.0


We see that the median rating is 4*

In [2]:
import networkx as nx
from networkx.algorithms import bipartite

bip = nx.Graph()

user_list = [x for x in df["UserID"]]
item_list = [str(x) for x in df["ItemID"]]
#need to convert item IDs into strings so that the two columns have no numbers in common
#only then is the graph recognized as bipartite


#the users and items are the nodes of the two parts of the bipartite graph
coupled = [(user_list[i], item_list[i]) for i in range(len(user_list))]
#this is a list of tuples of (UserID, ItemID). They are the edges of the bipartite graph

In [3]:
#building the bipartite graph
bip.add_nodes_from(user_list, bipartite=0)
bip.add_nodes_from(item_list, bipartite=1)
bip.add_edges_from(coupled)

In [4]:
print(nx.is_connected(bip))
item_nodes, user_nodes = bipartite.sets(bip)
print(list(user_nodes)[:20])
print(list(item_nodes)[:20])

True
['150', '866', '820', '1481', '807', '1295', '1239', '469', '1383', '1191', '62', '2', '214', '1111', '1418', '73', '954', '1213', '688', '1091']
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]


In [6]:
#nx.draw(bip, with_labels = True)  
# Separate by group
pos = {}
import matplotlib.pyplot as plt

# Update position for node from each group
pos.update((node, (1, index)) for index, node in enumerate(user_nodes))
pos.update((node, (2, index)) for index, node in enumerate(item_nodes))

nx.draw(bip, pos=pos)
plt.show()

KeyboardInterrupt: 

In [5]:
user_proj = bipartite.projected_graph(bip, user_nodes)

In [6]:
user_edges = user_proj.edges()
print("The number of edges between users is", len(user_edges))

The number of edges between users is 983206


In [7]:
item_proj = bipartite.projected_graph(bip, item_nodes)

In [8]:
item_edges = item_proj.edges()
print("The number of edges between items is", len(item_edges))

The number of edges between items is 429110


Now we have two unipartite, undirected and unweighted graphs - user_proj and item_proj. We have to find the communities for users and items (movies).

In [None]:
import community
import networkx as nx
import matplotlib.pyplot as plt

partition = community.best_partition(user_proj)
print("Louvain Modularity: ", community.modularity(partition, user_proj))
print("Louvain Partition: ", partition)
size = float(len(set(partition.values())))
pos = nx.spring_layout(user_proj)
count = 0.
colors = ['r', 'b', 'g']
for com in set(partition.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in partition.keys()
                                if partition[nodes] == com]
    nx.draw_networkx_nodes(user_proj, pos, list_nodes, node_size = 20, node_color = colors[int(count)-1])


nx.draw_networkx_edges(user_proj, pos, alpha=0.5)
plt.show()

Louvain Modularity:  0.07064899868472865
Louvain Partition:  {'1528': 1, '138': 0, '913': 0, '933': 1, '1321': 1, '489': 0, '1413': 0, '455': 1, '563': 2, '4': 0, '1180': 2, '1402': 0, '482': 2, '934': 1, '71': 0, '1123': 0, '381': 2, '83': 2, '333': 1, '823': 1, '95': 0, '659': 2, '158': 0, '539': 1, '1535': 2, '845': 1, '227': 2, '1401': 0, '171': 2, '1485': 0, '1346': 2, '276': 1, '1463': 0, '709': 0, '1270': 0, '179': 0, '1149': 0, '1024': 0, '1636': 0, '1258': 1, '766': 1, '970': 2, '912': 0, '1515': 0, '1247': 2, '889': 0, '492': 0, '594': 0, '204': 0, '1164': 1, '669': 2, '1188': 2, '134': 0, '1486': 0, '1458': 0, '196': 0, '1017': 1, '1041': 2, '1312': 1, '1124': 0, '819': 1, '1064': 0, '373': 2, '515': 0, '168': 0, '1073': 2, '544': 1, '592': 2, '1271': 2, '414': 2, '88': 0, '661': 2, '895': 1, '1456': 0, '770': 0, '488': 0, '1168': 2, '1447': 0, '1187': 1, '1143': 1, '213': 0, '1566': 2, '744': 1, '410': 1, '1659': 0, '967': 0, '1446': 0, '1179': 2, '611': 0, '22': 0, '1228':

In [None]:
import community
import networkx as nx
import matplotlib.pyplot as plt

partition = community.best_partition(item_proj)
print("Louvain Modularity: ", community.modularity(partition, item_proj))
print("Louvain Partition: ", partition)
size = float(len(set(partition.values())))
pos = nx.spring_layout(item_proj)
count = 0.
colors = ['r', 'b', 'g']
for com in set(partition.values()) :
    count = count + 1.
    list_nodes = [nodes for nodes in partition.keys()
                                if partition[nodes] == com]
    nx.draw_networkx_nodes(item_proj, pos, list_nodes, node_size = 20, node_color = colors[int(count)-1])


nx.draw_networkx_edges(item_proj, pos, alpha=0.5)
plt.show()

Louvain Modularity:  0.013070821174306968
Louvain Partition:  {1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 0, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 0, 16: 1, 17: 0, 18: 1, 19: 0, 20: 1, 21: 0, 22: 1, 23: 1, 24: 0, 25: 1, 26: 0, 27: 0, 28: 1, 29: 0, 30: 0, 31: 0, 32: 0, 33: 0, 34: 0, 35: 0, 36: 0, 37: 1, 38: 0, 39: 0, 40: 0, 41: 1, 42: 1, 43: 1, 44: 1, 45: 1, 46: 0, 47: 0, 48: 0, 49: 1, 50: 0, 51: 1, 52: 0, 53: 1, 54: 0, 55: 1, 56: 1, 57: 0, 58: 1, 59: 1, 60: 1, 61: 0, 62: 1, 63: 0, 64: 1, 65: 1, 66: 0, 67: 1, 68: 0, 69: 0, 70: 1, 71: 1, 72: 1, 73: 1, 74: 0, 75: 1, 76: 1, 77: 1, 78: 0, 79: 0, 80: 1, 81: 1, 82: 1, 83: 0, 84: 0, 85: 1, 86: 0, 87: 1, 88: 0, 89: 1, 90: 1, 91: 0, 92: 1, 93: 1, 94: 1, 95: 1, 96: 1, 97: 1, 98: 1, 99: 0, 100: 0, 101: 1, 102: 1, 103: 1, 104: 1, 105: 0, 106: 1, 107: 0, 108: 1, 109: 1, 110: 0, 111: 0, 112: 0, 113: 0, 114: 1, 115: 1, 116: 1, 117: 1, 118: 1, 119: 1, 120: 0, 121: 1, 122: 1, 123: 1, 124: 1, 125: 1, 126: 0, 127: 0, 128: 1, 129: 0, 130: 1, 131: 

In [18]:
print(len(item_proj))
print(len(user_proj))

1682
943


In [None]:
#Girvan-Newman
"""
from networkx.algorithms.community.centrality import girvan_newman
import itertools
k = 1
comp = girvan_newman(user_proj)
limited = itertools.takewhile(lambda c: len(c) <= k, comp)
for communities in limited:
    print(tuple(sorted(c) for c in communities)) 
"""