In [1]:
import sys
import os.path
import networkx as nx
import csv
DATA_DIR = "/Users/kaho/projects/data_l109/small_foursquare_dataset"
transit = []
with open(os.path.join(DATA_DIR, "new_york_placenet_transitions.txt"), 'r') as f:
    r = csv.DictReader(f)
    for row in r:
        transit.append({"from": int(row["from"]),
                        "to": int(row["to"]),
                        "time_from": row["time_from"],
                        "time_to": row["time_to"]})
locations = {}
with open(os.path.join(DATA_DIR, "new_york_anon_locationData_newcrawl.txt"), 'r') as f:
    for row in f:
        id_ent = row.split("*;*")
        fields = map(lambda x: x.strip(), id_ent[1][1:-2].split(","))
        locations[int(id_ent[0])] = {"latitude": float(fields[0]),
                                "longtitude": float(fields[1]),
                                "type": fields[2][1:-1],
                                "total_users": int(fields[3][1:-1]),
                                "total_checkins": int(fields[4][1:-1]),
                                "name": fields[5][1:-1]}

In [2]:
from math import pow
def compute_distance(edge, nodes):
    from_node = nodes[edge["from"]]
    to_node = nodes[edge["to"]]
    return pow(pow((from_node["longtitude"] - to_node["longtitude"]), 2)
               + pow((from_node["latitude"] - to_node["latitude"]), 2), 0.5)

In [3]:
g = nx.DiGraph()
for k, v in locations.iteritems():
    g.add_node(k, v)
for t in transit:
    g.add_edge(t["from"], t["to"], weight=compute_distance(t, g.node), time_from=t["time_from"], time_to=t["time_to"])

In [4]:
import community

In [5]:
g_non = nx.Graph()

In [6]:
for k, v in locations.iteritems():
    g_non.add_node(k, v)
for t in transit:
    g_non.add_edge(t["from"], t["to"], weight=compute_distance(t, g.node), time_from=t["time_from"], time_to=t["time_to"])

In [7]:
partition = community.best_partition(g_non)

In [8]:
partition

{0: 0,
 1: 1,
 2: 2,
 3: 3,
 4: 0,
 5: 4,
 6: 4,
 7: 5,
 8: 6,
 9: 7,
 10: 0,
 11: 1,
 12: 8,
 13: 6,
 14: 9,
 15: 5,
 16: 10,
 17: 0,
 18: 11,
 19: 8,
 20: 0,
 21: 12,
 22: 13,
 23: 6,
 24: 0,
 25: 4,
 26: 8,
 27: 4,
 28: 14,
 29: 0,
 30: 15,
 31: 4,
 32: 16,
 33: 8,
 34: 0,
 35: 14,
 36: 5,
 37: 0,
 38: 17,
 39: 18,
 40: 19,
 41: 0,
 42: 8,
 43: 0,
 44: 4,
 45: 20,
 46: 8,
 47: 6,
 48: 0,
 49: 21,
 50: 14,
 51: 22,
 52: 14,
 53: 0,
 54: 23,
 55: 24,
 56: 13,
 57: 0,
 58: 25,
 59: 25,
 60: 5,
 61: 17,
 62: 23,
 63: 8,
 64: 17,
 65: 5,
 66: 0,
 67: 26,
 68: 5,
 69: 5,
 70: 3,
 71: 21,
 72: 0,
 73: 0,
 74: 1,
 75: 4,
 76: 18,
 77: 8,
 78: 0,
 79: 8,
 80: 27,
 81: 28,
 82: 8,
 83: 8,
 84: 23,
 85: 4,
 86: 4,
 87: 0,
 88: 29,
 89: 1,
 90: 0,
 91: 30,
 92: 5,
 93: 0,
 94: 0,
 95: 10,
 96: 3,
 97: 1,
 98: 1,
 99: 4,
 100: 31,
 101: 17,
 102: 18,
 103: 25,
 104: 31,
 105: 8,
 106: 31,
 107: 0,
 108: 7,
 109: 5,
 110: 1,
 111: 11,
 112: 32,
 113: 14,
 114: 21,
 115: 0,
 116: 4,
 117: 1,
 118:

In [9]:
with open("partition.txt", 'w') as f:
    for n, p in partition.iteritems():
        f.write("{} {}\n".format(n, p))