In [5]:
import pandas as pd
import geopandas as gpd
import numpy as np
import time 
import os
from ParseJsons import check_box
from multiprocessing import Process
from geopandas.geoseries import Point




def read_venues_lsoa(outfolder, city):

    lsoa_venues = {}
    infile      = outfolder + '/venues_info/venues_lsoa_full.dat'

    for line in open(infile):
        if 'lsoa' not in line:
            fields = line.strip().split('\t')
            venue, lsoa = fields[0],fields[3]

            if lsoa not in lsoa_venues:
                lsoa_venues[lsoa] = [venue]
            else:
                lsoa_venues[lsoa].append(venue)

    return lsoa_venues



def read_users_lsoa(outfolder, city):

    lsoa_users = {}
    infile     = outfolder + '/user_info/user_lsoas.dat'

    for line in open(infile):
        if 'lsoa' not in line:
            user, lsoa = line.strip().split('\t')
          
            if lsoa not in lsoa_users:
                lsoa_users[lsoa] = [user]
            else:
                lsoa_users[lsoa].append(user)

    return lsoa_users


def get_venues_users(outfolder, city):
    
    t1 = time.time()
    print ('Getting venues user list...')
    venues_users = {}
    
    for ind, line in enumerate(open(outfolder + '/venues_info/' + city + '_venues_users.dat')):
        
        #if ind == 10: break
        fields = line.strip().split('\t')
        venue  = fields[0]
        users  = fields[1:]
        
        venues_users[venue] = users
        

    print ('Venues user lists parsed\t', time.time() - t1)
    return venues_users
 
    
    
def get_edge_weights2(city, outfolder, venues_users, lsoa_venues):
    

    t1 = time.time()
    edges_weights2 = {}
    print ('Parsing venue similarity network edge list...')
    
    nnn = len(lsoa_venues)    

    for ind, (lsoa, venues) in enumerate(lsoa_venues.items()):        
        for v1 in venues:
            for v2 in venues:
                if v1 != v2:
                    edge   = '_'.join(sorted([v1, v2]))
                    w = len(set(venues_users[v1]).intersection(set(venues_users[v2])))
                    if w > 0:
                        edges_weights2[edge] = w             

    print ('Venues similarity network edges parsed\t', time.time() - t1)
    return edges_weights2   
    

In [6]:
eps       = 0.01
mins      = 3
LIMIT_num = 0
city      = 'london'
outfolder = '../ProcessedData/' + city + '/'
infile    = outroot + '/user_homes/centroids_filtered/' + city + '_user_homes_dbscan_' + str(eps) + '_' + str(mins) + '_' + str(LIMIT_num) + '_filtered.dat'



lsoa_venues   = read_venues_lsoa(outfolder, city)
lsoa_users    = read_users_lsoa(outfolder, city)
venues_users  = get_venues_users(outfolder, city)
edges_weights = get_edge_weights2(city, outfolder, venues_users, lsoa_venues) 

Getting venues user list...
('Venues user lists parsed\t', 2.586289167404175)


Parsing venue similarity network edge list...
('Venues similarity network edges parsed\t', 111.57537603378296)


In [9]:
def get_node_edge_list(edges_weights):

    t1 = time.time()
    print ('Listing each nodes neighbours and those edge weights...')


    # for each node list the edges (weights) in which they are present
    nodes_edge_weights = {}
    nnn = len(edges_weights)    

    for ind, (e, w) in enumerate(edges_weights.items()):
    
#        if ind == 100: break
 #       print (ind, '/', nnn)           
 
        e1, e2 = e.split('_')
        
        if e1 not in nodes_edge_weights:
            nodes_edge_weights[e1] = [(e2, w)]
        else:
            nodes_edge_weights[e1].append((e2, w))

                    
        if e2 not in nodes_edge_weights:
            nodes_edge_weights[e2] = [(e1, w)]
        else:
            nodes_edge_weights[e2].append((e1, w))
        
    print ('Neighbour list created\t', time.time() - t1)
    return nodes_edge_weights
    




559481