Author: Joshua, Will, Ethan <br />
Summary: Given a grid, makes an adjacency matrix. I.e. {0,1} if two squares in a grid are adjacent.  

In [1]:
import csv
import collections
import pandas as pd
import numpy as np
import math
import json

In [2]:
# This is the grid object, which is used throughout all data preprocessing.
# It represents the city of Austin through a series of grids.
# It thus makes a tractable way to compute distance between grids, ect. 
# Note 1: grid starts counting at 0 (not 1).
class Grid():
    def __init__(self, grid_json):
        self.grid = grid_json
        self.min_lat = self.grid["latitude_min"]
        self.min_lon = self.grid["longitude_min"]
        self.max_lat = self.grid["latitude_max"]
        self.max_lon = self.grid["longitude_max"]
        self.latitude_delta = self.grid["latitude_step"]
        self.longitude_delta = self.grid["longitude_step"]
        self.nrows = math.ceil((self.max_lat - self.min_lat) / self.latitude_delta)
        self.ncols = math.ceil((self.max_lon - self.min_lon) / self.longitude_delta)
        self.times = self.grid["time_matrix"]
        self.census_tract_region_map = self.grid["census_tract_region_mapping"]
        self.region_to_tract = collections.defaultdict(list)
        for census_tract in self.census_tract_region_map:
            for region in self.census_tract_region_map[census_tract]:
                self.region_to_tract[region].append(census_tract)
    def map_point_to_region(self, latitude, longitude):
        return math.floor((latitude-self.min_lat)/self.latitude_delta) * self.ncols  + math.floor((longitude-self.min_lon)/self.longitude_delta)
    def get_representative(self, region_num):
        row_num = region_num//self.ncols
        col_num = region_num - row_num*self.ncols
        lat = self.min_lat + row_num * self.latitude_delta + 0.5*self.latitude_delta
        lon = self.min_lon + col_num * self.longitude_delta + 0.5*self.longitude_delta
        return [lon, lat]
    def get_time(self, region1, region2):
        try:
            return self.times[region1][region2]
        except IndexError:
            return -1
    def region_to_census_tract(self, region):
        try:
            return self.region_to_tract[region]
        except KeyError:
            return "0_0"

In [4]:
# Using old distance matrix to get an idea of how close we are (?)
with open("../Input_Data/grid_info_3200_v2.json", "r") as f:
    grid_json = json.load(f)
g = Grid(grid_json)
#g.map_point_to_region(0, 5)
#g.region_to_tract

In [5]:
numregions = g.nrows*g.ncols
print(g.nrows, g.ncols)

53 61


In [6]:
#adj = np.zeros((numregions, numregions), dtype=np.uint8) #this is the adjacency matrix

In [7]:
#fun and funny are an example, we use it on the grid later
fun = np.zeros((10,10), dtype=np.uint8)
funny = fun.ravel()

def writeNeighbors(reg_num, numrows, numcols, grid_index):
    reg_idx = reg_num  #why was the off indexed by 1?
     
    #write at reg_num
    if (reg_num<=numrows*numcols): #(reg_num>=1) and
        if(reg_idx >= len(g.times)):
            print("error")
            pass
        else:
            grid_index[reg_idx] = 1
    
    #row-1, col
    if not (reg_num//numcols == 0):
        if(reg_idx-numcols >= len(g.times)):
            pass
        else:
            grid_index[(reg_idx-numcols)] = 1
    
    #row+1, col
    if  (reg_num <= (numrows-1)*numcols):
        if(reg_idx+numcols >= len(g.times)):
            pass
        else:
            grid_index[(reg_idx+numcols)] = 1
    
    #row, col-1
    if not (reg_num%numcols == 0):
        if(reg_idx - 1 >= len(g.times)):
            pass
        else:
            grid_index[(reg_idx-1)] = 1
    
    #row, col+1
    if  not (reg_num%numcols == numcols - 1):
        if(reg_idx + 1 >= len(g.times)):
            pass
        else:
            grid_index[(reg_idx+1)] = 1

#you can change the first number and see how the adjancency 
writeNeighbors(99, 10, 10, funny)
fun

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]], dtype=uint8)

In [8]:
10%10

0

In [9]:
writeNeighbors(0, 10, 10, funny)

In [10]:
fun

array([[1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1]], dtype=uint8)

In [11]:
data = pd.DataFrame()
#data["id"] = [x for x in range(1, g.nrows*g.ncols+1)] #I just have to remove this header later

for i in range(0, len(g.times)): #g.nrows*g.ncols+1):
    column = [0 for x in range(0,len(g.times))] #g.nrows*g.ncols+1):
    writeNeighbors(i, g.nrows, g.ncols, column)
    data["{}".format(i)] = column
data

In [124]:
14*15

210

In [12]:
#data.to_csv("../Output_Data/austin_data/adjacent_nbhd.csv", index=False)
np.savetxt("../Output_Data/austin_data_3200/adjacent_nbhd.csv", data, delimiter=',')

In [13]:
x = data.to_numpy()
print(x.shape)
print(np.sum(x[2,:])) 
print(np.sum(x[:,:])) 
#15938 should not be possible?
3233 * 5 #at most adjacent to 4 neighbors and yourself.

(3200, 3200)
4
15772


16165

In [121]:
x[2:4,0:10]
#3np.where(x[2] != 0) #each row should sum to at most 5

array([[0, 1, 1, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 1, 1, 0, 0, 0, 0, 0]], dtype=int64)

In [None]:
;