In [2]:
import pandas as pd
import networkx as nx
import numpy as np

In [3]:
edges_dict = {}
with open('data/ca-CondMat.txt', "r") as f:
    lines = f.readlines()
    for line in lines[4:]:
        data = line.strip().split("\t")
        if (int(data[0]), int(data[1])) in edges_dict:
            edges_dict[(int(data[0]), int(data[1]))] += 1
        elif (int(data[1]), int(data[0])) in edges_dict:
            edges_dict[(int(data[1]), int(data[0]))] += 1
        else:
            edges_dict[(int(data[0]), int(data[1]))] = 1
        
edges = [(k[0], k[1], v) for k, v in edges_dict.items()]

G = nx.Graph()
G.add_weighted_edges_from(edges)

print("CONDMAT")
print("# nodes:", G.number_of_nodes())
print("# edges:", G.number_of_edges())
print("avg degree:", np.mean([x[1] for x in list(G.degree())]))
print("density:", nx.density(G))

CONDMAT
# nodes: 23133
# edges: 93497
avg degree: 8.083430596982666
density: 0.0003494479766981958


In [7]:
edges_dict = {}
with open('data/datasets/mit/out.mit', "r") as f:
    lines = f.readlines()
    for line in lines[1:]:
        data = line.strip().split(" ")
        data = [data[0]] + data[1].split("\t")
        if (int(data[0]), int(data[1])) in edges_dict:
            edges_dict[(int(data[0]), int(data[1]))] += 1
        elif (int(data[1]), int(data[0])) in edges_dict:
            edges_dict[(int(data[1]), int(data[0]))] += 1
        else:
            edges_dict[(int(data[0]), int(data[1]))] = 1
        
edges = [(k[0], k[1], v) for k, v in edges_dict.items()]

G = nx.Graph()
G.add_weighted_edges_from(edges)

print("Reality Mining")
print("# nodes:", G.number_of_nodes())
print("# edges:", G.number_of_edges())
print("avg degree:", np.mean([x[1] for x in list(G.degree())]))
print("density:", nx.density(G))

Reality Mining
# nodes: 96
# edges: 2539
avg degree: 52.895833333333336
density: 0.5567982456140351


In [4]:
edges_dict = {}
with open('data/datasets/topology/out.topology', "r") as f:
    lines = f.readlines()
    for line in lines[1:]:
        data = line.strip().split(" ")
        if (int(data[0]), int(data[1])) in edges_dict:
            edges_dict[(int(data[0]), int(data[1]))] += 1
        elif (int(data[1]), int(data[0])) in edges_dict:
            edges_dict[(int(data[1]), int(data[0]))] += 1
        else:
            edges_dict[(int(data[0]), int(data[1]))] = 1
        
edges = [(k[0], k[1], v) for k, v in edges_dict.items()]

G = nx.Graph()
G.add_weighted_edges_from(edges)

print("Topology")
print("# nodes:", G.number_of_nodes())
print("# edges:", G.number_of_edges())
print("avg degree:", np.mean([x[1] for x in list(G.degree())]))
print("density:", nx.density(G))

Topology
# nodes: 34761
# edges: 107720
avg degree: 6.197750352406432
density: 0.0001783012184236603


In [5]:
edges_dict = {}
users = set()
tags = set()
with open('data/datasets/munmun_twitterex_ut/out.munmun_twitterex_ut', "r") as f:
    lines = f.readlines()
    for line in lines[1:]:
        data = line.strip().split(" ")
        user = 'user' + data[0]
        tag = 'tag' + data[1]
        if len(users) < 1000 or user in users:
            if len(tags) < 10000 or tag in tags:
                users.add(user)
                tags.add(tag)
                
                if (user, tag) not in edges_dict:
                    edges_dict[(user, tag)] = 1
                else:
                    edges_dict[(user, tag)] += 1
 
edges = [(k[0], k[1], v) for k, v in edges_dict.items()]

G = nx.Graph()
G.add_weighted_edges_from(edges)

print("Twitter")
print("# nodes:", G.number_of_nodes())
print("# edges:", G.number_of_edges())
print("avg degree:", np.mean([x[1] for x in list(G.degree())]))
print("density:", nx.density(G))

Twitter
# nodes: 11000
# edges: 18173
avg degree: 3.304181818181818
density: 0.00030040747505971616


In [6]:
edges_dict = {}
with open('data/datasets/prosper-loans/out.prosper-loans', "r") as f:
    lines = f.readlines()
    for line in lines[1:]:
        data = line.strip().split(" ")
        data = [data[0]] + data[1].split("\t")
        if (int(data[0]), int(data[1])) in edges_dict:
            edges_dict[(int(data[0]), int(data[1]))] += 1
        elif (int(data[1]), int(data[0])) in edges_dict:
            edges_dict[(int(data[1]), int(data[0]))] += 1
        else:
            edges_dict[(int(data[0]), int(data[1]))] = 1
        
edges = [(k[0], k[1], v) for k, v in edges_dict.items()]

G = nx.DiGraph()
G.add_weighted_edges_from(edges)

print("Prosper Loans")
print("# nodes:", G.number_of_nodes())
print("# edges:", G.number_of_edges())
print("avg degree:", np.mean([x[1] for x in list(G.degree())]))
print("density:", nx.density(G))

Prosper Loans
# nodes: 89269
# edges: 3330022
avg degree: 74.60645912914897
density: 0.00041787907833237543
