In [102]:
import numpy as np
import json
import csv
import pandas as pd
import random
import math
from decimal import Decimal

In [216]:
# function to read predictions from data file
# depending on the method/strategy specified:
# "simple", "random", or "uniform"
# returns list of tuples of lat/lon rat instances
# to be fed into the generate_geojson_from_pts function

def read_predictions(filename, method_str):
    
    # read predicted data file
    predictions = pd.read_csv(filename)
    rows = predictions.shape[0]
    
    # create array to hold latitude longitude points
    points = []
    
    # for each row, create number of instances ("rats")
    # based on ppm reading
    for r in range(0, rows):
        ppm = int(100*predictions["ppm*100"][r]) # <-- make sure name of column is right
        lat, long = get_center(parse_str(predictions['Grids'][r]))
        
        if method_str == "simple":
            # generate rat locations within grid box
            for rat in range(0, ppm):
                points.append((long, lat))
                
        elif method_str == "random":
            # generate random rat locations within grid box
            lats, lons = random_rats(ppm, lat, long)
        
            # add each rat instance to list of instances
            for rat in range(0, len(lats)):
                points.append((lons[rat], lats[rat]))
                
        elif method_str == "uniform":
            # generate uniform rat locations within grid box
            lats, lons = uniform_rats(ppm, lat, long)

            # add each rat instance to list of instances
            for rat in range(0, len(lats)):
                points.append((lons[rat], lats[rat]))
                print((lons[rat], lats[rat]))
    
    # return array of latitude longitude points
    return points


In [214]:
# alternative to the function above, created in order to avoid 
# parsing error, still not resolved...

# function to read predictions from data file
# uses a uniform rat instance distribution
# returns list of tuples of lat/lon rat instances
# to be fed into the generate_geojson_from_pts function

def read_predictions_uniform(filename):
    
    # read predicted data file
    predictions = pd.read_csv(filename)
    rows = predictions.shape[0]
    
    # create array to hold latitude longitude points
    points = []
    
    # for each row, create number of instances ("rats")
    # based on ppm reading
    for r in range(0, rows):
        ppm = int(5*predictions["ppm*100"][r]) # <-- make sure name of column is right
        
        hold_coords = parse_str(predictions['Grids'][r])
        
        lat_min = hold_coords[0][1]
        lat_max = hold_coords[2][1]
        lon_min = hold_coords[0][0]
        lon_max = hold_coords[1][0]
        
        sqrt = int(math.sqrt(abs(ppm)))
        lats = np.linspace(lat_min, lat_max, num=sqrt)
        lons  = np.linspace(lon_min, lon_max, num=sqrt)

        # add each rat instance to list of instances
        for rat in range(0, len(lats)):
            points.append((lons[rat], lats[rat]))
            print((lons[rat], lats[rat]))
    
    # return array of latitude longitude points
    return points


In [217]:
# function from Weiwei
# creates geojason from array of tuples containing
# all lat/lon "rat instances"

def generate_geojson_from_pts(pts):

    '''
    takes 'pts' which is an array of longitude/latitude points
    and produces a geojason file much like the rodetns.geojson
    '''
    features = []

    for i, pt in enumerate(pts):
        pt_properties = {"type": "Feature",
                         "id": i,
                         "properties":{},
                         "geometry": {"type": "Point", "coordinates": [pt[0], pt[1]]}}

        features.append(pt_properties)

    geojson = {"type":"FeatureCollection", "features":features}
    return geojson

pts = [[ -71.157609, 42.355988 ], [ -71.05729, 42.36571 ], [ -71.0047, 42.38914 ]]
geojson = generate_geojson_from_pts(pts)

with open('data.geojson', 'w') as fp:
    json.dump(geojson, fp)

In [219]:
# generate grid vertices/ticks

x = [ -71.20197, -70.96679]
y = [42.291441, 42.420578]
x_cell = 50
y_cell = 50

x_min = -71.20197
x_max = -70.96679
y_min = 42.291441
y_max = 42.420578

# create ticks
x_s = np.linspace(x_min, x_max, x_cell + 1)
y_s = np.linspace(y_min, y_max, y_cell + 1)

In [220]:
# given lat/lon spot
# return indicies for lat/lon grid box bounds
# where bounds are:
# y_s[lat_index - 1], y_s[lat_index]
# x_s[lon_index - 1], x_s[lon_index]
def find_gridbox(lat, long):
    # locate correct grid box bounds
    lat_index = 0
    lon_index = 0
    for i in range(0, 50):
        if x_s[i] < long:
            lon_index = i
        if y_s[i] > lat:
            lon_index = i
    return (lat_index, lon_index)

In [221]:
# given list of vertices as tuple pairs,
# return lat/lon location of center of cell
def get_center(grid_vertices):
    lat = (grid_vertices[1][1] + grid_vertices[2][1]) / 2
    lon = (grid_vertices[0][0] + grid_vertices[1][0]) / 2
    return (lat, lon)

In [222]:
# given a ppm value, and the lat/lon coordinates
# for a location within a cell,
# return array of randomly placed rats within cell
# (number of rats based on ppm value)
def random_rats(ppm, lat, long):
    lat_index, lon_index = find_gridbox(lat, long)
    sqrt = int(math.sqrt(ppm))
    lats = random.sample(range(y_s[lat_index - 1], y_s[lat_index], sqrt))
    lons = random.sample(range(x_s[lon_index - 1], x_s[lon_index], sqrt))
    return (lats, lons) 

In [223]:
# given a ppm value, and the lat/lon coordinates
# for a location within a cell,
# return array of uniformly placed rats within cell
# (number of rats based on ppm value)
def uniform_rats(ppm, lat, long):
    lat_index, lon_index = find_gridbox(lat, long)
    sqrt = int(math.sqrt(abs(ppm)))
    lats = np.linspace(y_s[lat_index - 1], y_s[lat_index], num=sqrt)
    lons  = np.linspace(x_s[lon_index - 1], x_s[lon_index], num=sqrt)
    return (lats, lons)

In [224]:
# what the boston_preds.csv file looks like as a dataframe

DF = pd.read_csv('boston_preds.csv')
DF

Unnamed: 0.1,Unnamed: 0,Grids,ppm*100
0,0,"[(-71.201970000000003, 42.291440999999999), (-...",0.834596
1,1,"[(-71.197266400000004, 42.291440999999999), (-...",1.337284
2,2,"[(-71.192562800000005, 42.291440999999999), (-...",1.742284
3,3,"[(-71.187859200000005, 42.291440999999999), (-...",1.803900
4,4,"[(-71.183155600000006, 42.291440999999999), (-...",1.780794
5,5,"[(-71.178452000000007, 42.291440999999999), (-...",1.773092
6,6,"[(-71.173748400000008, 42.291440999999999), (-...",1.780794
7,7,"[(-71.169044800000009, 42.291440999999999), (-...",1.749986
8,8,"[(-71.16434120000001, 42.291440999999999), (-7...",1.764796
9,9,"[(-71.159637599999996, 42.291440999999999), (-...",1.793556


In [225]:
# function to parse string from "Grids" column in boston_preds.csv
# returns list of grid vertices as float64 tuples
def parse_str(str_edit):
    str_edit = str_edit.replace('[', '')
    str_edit = str_edit.replace('(', '')
    str_edit = str_edit.replace(']', '')
    str_edit = str_edit.replace(')', '')
    str_edit = str_edit.replace(',', '')
    list = str_edit.split(' ')
    return [(np.float64(list[0]), np.float64(list[1])), (np.float64(list[2]), np.float64(list[3])),
            (np.float64(list[4]), np.float64(list[5])), (np.float64(list[6]), np.float64(list[7]))]

In [None]:
# MAIN CODE
# EDIT THIS CODE TO READ DATA AND WRITE FILE

POINTS = read_predictions_uniform('boston_preds.csv', 'uniform')
geojson = generate_geojson_from_pts(POINTS)
with open('../gh-pages/TRiCAM_BostonAQ/data_test.geojson', 'w') as fp:
    json.dump(geojson, fp)