In [1]:
# Import python packages
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
import theano as T
import theano.tensor as tt
import seaborn as sns
import matplotlib as mp
import numpy.random as npr
import matplotlib.patches as mpatches
from scipy.interpolate import interp1d
import datetime as dt
from scipy.stats.kde import gaussian_kde
import matplotlib.path as mpath
import scipy as sp
import sqlite3
import os
import shapefile
import pdb
from progressbar import ProgressBar
#mp.pyplot.style.use('ggplot')
mp.pyplot.style.use('seaborn-darkgrid')

In [2]:
# Helper function
def indexall(L):
    poo = []
    for p in L:
        if not p in poo:
            poo.append(p)
    Ix = np.array([poo.index(p) for p in L])
    return poo,Ix

def subindexall(short,long):
    poo = []
    out = []
    for s,l in zip(short,long):
        if not l in poo:
            poo.append(l)
            out.append(s)
    return indexall(out)

match = lambda a, b: [ b.index(x) if x in b else None for x in a ]
grep = lambda s, l: np.array([i for i in l if s in i])

# Function to standardize covariates
def stdize(x):
    return (x-np.mean(x))/(np.std(x)*2)

def invlogit(x):
    return np.exp(x)/(1+np.exp(x))

In [3]:
from shapely.geometry import *

In [4]:
# Import new set data
#ndata = pd.read_excel('Local_Pop_Diff_2010_2015.xlsx')
setdata = pd.read_csv('maxnfunctionalgroups_&reefcovariates_2019-06-24.csv')


  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
nobs = np.shape(setdata)[0]

In [6]:
setdata.columns

Index(['region_name', 'region_id', 'location_name', 'location_code',
       'location_id', 'site_name', 'site_code', 'site_id', 'reef_name',
       'reef_type', 'reef_code', 'reef_id', 'trip_year', 'trip_code', 'set_id',
       'set_code', 'set_date', 'set_latitude', 'set_longitude', 'maxn',
       'functional_group', 'depth', 'drop_time', 'haul_time', 'bait',
       'bait_oiled', 'current_flow_estimated', 'current_flow_instrumented',
       'visibility', 'field_of_view', 'substrate_relief_sd',
       'substrate_relief_mean', 'equipment_frame_type', 'equipment_camera',
       'equipment_stereo_camera', 'equipment_camera_height',
       'equipment_arm_length', 'ascidians', 'bleached_corals', 'bryozoa',
       'consolidated', 'crinoids', 'halimeda', 'hard_coral', 'hydrocoral',
       'hydroids', 'invertebrate_complex', 'macroalgae', 'mangrove',
       'seagrass', 'soft_coral', 'sponge', 'true_anemones', 'unconsolidated',
       'zoanthids', 'video_length_watched', 'video_filename', 'vide

In [7]:
# Create unique location ids
setdata['LatLon'] = np.array([str(t)+'_'+str(n) for t,n in zip(setdata.set_latitude.values,setdata.set_longitude.values)])

# Create index list
LatLongs,Ill = indexall(setdata.LatLon.values)
nurecs = len(LatLongs)

In [8]:
len(LatLongs)

18658

Import shapefile

In [9]:
sf = shapefile.Reader('Global_Gravity_of_Coral_Reefs_2_0.shp')

In [10]:
sf.shapeName

'Global_Gravity_of_Coral_Reefs_2_0'

In [11]:
print('number of shapes imported:', len(sf.shapes()))

number of shapes imported: 27156


In [12]:
# Grab polygons
shapes = sf.shapes()

In [13]:
# Grab data fields
fields = sf.fields
fields

[('DeletionFlag', 'C', 1, 0),
 ['reef_ID', 'N', 8, 0],
 ['TT_pop', 'N', 19, 12],
 ['TT_market', 'N', 19, 12],
 ['Grav_NC', 'N', 19, 10],
 ['Grav_citie', 'N', 19, 10],
 ['Grav_tot', 'N', 19, 10],
 ['Grav_NP', 'N', 19, 11]]

Grav_NP was used in the bright spots paper

In [14]:
# Grab data
records = sf.records()

In [15]:
nshapes = len(records)

In [16]:
from shapely.geometry import shape, Point

In [17]:
len(shapes)

27156

In [18]:
shpx = np.array([shape(s) for s in shapes])

In [19]:
# One-off check
ptx = Point([177.32109,28.19964])
sflag = [s.contains(ptx) for s in shpx]
sum(sflag)
poot = [ptx.distance(s) for s in shpx]
rext = records[poot.index(min(poot))]
rext

Record #3874: [3881, 2858.0, 4744.0, 0.000910887, None, 0.0, None]

In [20]:
# Grab unique lat/lon values
lat,lon = np.array([l.split('_') for l in LatLongs]).astype(float).T

In [21]:
LatLongs[0]

'25.000496_-80.38673'

In [22]:
lat[0]

25.000496

In [23]:
# Create new gravity columns
Grav_NC = np.ones(nurecs)*-999
Grav_Total = np.ones(nurecs)*-999
Grav_Cities = np.ones(nurecs)*-999
Grav_NP = np.ones(nurecs)*-999

In [24]:
pbar = ProgressBar()
nrex = list(np.arange(nurecs))

# Longitude, latitude
for j in pbar(nrex):
    # Grab point
    pt = Point([lon[j],lat[j]])
    # Flag in case point falls outside of all polygons
    inx = 0
    sflag = [s.contains(pt) for s in shpx]
    # If point is in one of the polygons
    if sum(sflag)>0:
        rex = records[sflag.index(True)]
        Grav_NC[j] = rex[3]
        Grav_Total[j] = rex[5]
        Grav_Cities[j] = rex[4]
        Grav_NP[j] = rex[6] # Bright spots
    # If outside all polygons, then find nearest polygon
    else:
        poo = [pt.distance(s) for s in shpx]
        rex = records[poo.index(min(poo))]
        Grav_NC[j] = rex[3]
        Grav_Total[j] = rex[5]
        Grav_Cities[j] = rex[4]
        Grav_NP[j] = rex[6] # Bright spots
        #print(rex)

100% (18658 of 18658) |##################| Elapsed Time: 0:43:40 Time:  0:43:40


In [25]:
# Find any NA's
np.array(LatLongs)[np.isnan(Grav_NP)]

array(['23.876739999999998_166.29485', '23.88006_166.28557',
       '23.88097_166.2751', '23.88366_166.26608000000002',
       '23.88206_166.25533000000001', '23.881829999999997_166.24498',
       '23.87903_166.2905', '23.88026_166.28011',
       '23.88257_166.27066000000002', '23.88203_166.26077',
       '23.88436_166.25079', '23.8829_166.23907', '23.88426_166.23387',
       '23.88126_166.22852', '23.88024_166.22249', '23.88_166.21561',
       '23.87518_166.21112', '23.87236_166.2041', '23.65356_166.0876',
       '23.6432_166.08856', '23.636210000000002_166.09506000000002',
       '23.63384_166.1058', '23.62479_166.11061', '23.62272_166.12115',
       '23.64904_166.08446', '23.64031_166.09317', '23.63264_166.09874',
       '23.62934_166.10907', '23.623070000000002_166.11575',
       '23.62138_166.12677', '23.62539_166.14331', '23.62988_166.15225',
       '23.63298_166.16298', '23.63821_166.17201', '23.63615_166.18051',
       '23.63455_166.19196000000002', '28.27791_177.38455',
      

In [26]:
# Tmp fix for these sites
Grav_NP[np.isnan(Grav_NP)] = 0

In [27]:
# Find any NA's
np.array(LatLongs)[np.isnan(Grav_NC)]

array([], dtype='<U39')

In [28]:
# Find any NA's
np.array(LatLongs)[np.isnan(Grav_Total)]

array([], dtype='<U39')

In [29]:
# Find any NA's
np.array(LatLongs)[np.isnan(Grav_Cities)]

array(['-17.24876_119.35699', '-17.24798_119.35536',
       '-17.25281_119.35916', ..., '5.86677_-162.12193',
       '5.89801_-162.12832', '5.89674_-162.11684'], dtype='<U39')

In [30]:
# Tmp fix for these sites
Grav_Cities[np.isnan(Grav_Cities)] = 0

In [31]:
# Create new gravity columns
setdata['Grav_NC'] = Grav_NC[Ill]
setdata['Grav_Total'] = Grav_Total[Ill]
setdata['Grav_Cities'] = Grav_Cities[Ill]
setdata['Grav_NP'] = Grav_NP[Ill]

In [32]:
setdata.to_csv('FinPrint_Set_Data.csv', index=False)