# Obtaining random cell samples
In this exercise we will obtain a random sample of cells with its associate environmental covariates.

In [1]:
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
from drivers.tree_builder import TreeNeo
from drivers.graph_models import TreeNode, Order, Family, graph,Kingdom,Occurrence
from drivers.graph_models import Cell,Mex4km, countObjectsOf
from drivers.graph_models import pickNode
import matplotlib.pyplot as plt
import pandas as pd
import itertools as it
import numpy as np

## Use the ggplot style
plt.style.use('ggplot')

2. Selecting the space of considered cells.
For this case we will consider all the Mexican Territory. We can load the Mexican polygon from the database with the following commands.

> Failing to do so will likely obtain all the registered cells. In cases could hang the system.

In [2]:
from sketches.models import Country
from mesh.models import MexMesh
from traversals.sampling import UniformRandomCellSample

Mexico = Country.objects.filter(name__contains="exico").get()
mexican_cells = MexMesh.objects.filter(cell__intersects=Mexico.geom)

In [3]:
# Get all cell ids
ids = list(mexican_cells.values('pk'))
CellNodeClass = Mex4km
list_of_cell_ids = ids
sample_size = 300
selection_of_cells = UniformRandomCellSample(ids,Mex4km,sample_size=sample_size,random_seed=12345,with_replacement=True)

INFO Using custom random seed of: 12345
INFO Compiling Query and asking the Graph Database


The size of 'ids' is {{n}}
3. Generate the trees for each cell.


In [4]:
%time cells = list(selection_of_cells)

CPU times: user 1.93 s, sys: 96 ms, total: 2.03 s
Wall time: 5.4 s


1.2 We will use the function *buildTreeNeo* to extract the taxonomic tree in that area.

In [5]:
from traversals import strategies as st
%time trees = map(lambda cell : st.buildTreeNeo(cell),cells)

CPU times: user 42.5 s, sys: 1.41 s, total: 43.9 s
Wall time: 47.4 s


## Union of trees

In [6]:
%time ocs = reduce(lambda a,b : a + b ,map(lambda t : t.occurrences, trees))

CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 11.1 ms


In [7]:
## Ohh! super fast (compared to the other method that takes more than 20 minutes for this sample size)
%time bigtree = TreeNeo(ocs,cell_objects=cells)

CPU times: user 408 ms, sys: 12 ms, total: 420 ms
Wall time: 421 ms


In [8]:
## Extract the root (if any!)
root = bigtree.to_Plantae

In [9]:
root

<LocalTree | Kingdom: Plantae - n.count : 1700- | AF: 0.05 >

In [10]:
## Get presences absences
def PresenceAbsence(node,list_of_trees):
    pres_reps = pd.DataFrame({
    'Y' : map(
        lambda tree : tree.hasNode(node),list_of_trees)
    }).astype('int')
    return pres_reps

    

In [11]:
pres = PresenceAbsence(root,trees)

In [12]:
sum(pres.values)

array([112])

In [13]:

vars=['Elevation', 'MaxTemperature', 'MeanTemperature',
      'MinTemperature', 'Precipitation', 'Vapor',
      'SolarRadiation', 'WindSpeed','WorldPopLatam2010','DistanceToRoadMex' ]
#st.getEnvironmentalCovariatesFromListOfCells(cells,vars)

## Get environmental Covariates

In [14]:
from traversals import strategies as st

%time data = st.getEnvironmentalCovariatesFromListOfCells(cells,vars=vars)

CPU times: user 6.58 s, sys: 108 ms, total: 6.68 s
Wall time: 1min 42s


### Concatenate two dataframes 
> Build some utility functions for automating this tasks

In [26]:
data

Unnamed: 0,DistanceToRoadMex_mean,Elevation_mean,MaxTemperature_mean,MeanTemperature_mean,MinTemperature_mean,Precipitation_mean,SolarRadiation_mean,Vapor_mean,WindSpeed_mean,WorldPopLatam2010_mean,Longitude,Latitude
0,3868.661519,63.222222,30.225231,20.7662,30.3171,7.560185,18867.312500,1.75,2.26389,2.71775,-113.380563,27.54339
1,1379.938936,3.888889,27.693981,23.0694,27.8079,58.791667,16915.250000,2.21296,4.41667,3.72007,-97.406563,25.68139
2,9728.652812,2353.000000,21.844722,13.7833,21.8806,47.480556,18051.152778,1,3.11389,3.48854,-103.629563,22.83939
3,2510.775420,1305.777778,27.772222,20.794,27.831,85.865741,18112.976852,1.30093,2.18519,2.5483,-107.255563,25.97539
4,4264.779688,1212.777778,26.811343,N.A.,N.A.,19.287037,19742.972222,N.A.,N.A.,N.A.,-107.500563,31.80639
5,9601.187188,283.444444,30.418518,23.6551,30.5046,57.622685,17391.502315,1.97454,2.2338,1.68191,-98.974563,24.35839
6,4325.744238,59.555556,29.541945,23.2556,29.4611,55.769444,17422.047222,2.33333,2.90833,2.4007,-97.994563,24.16239
7,7029.830352,108.555556,32.506019,24.4306,32.4977,48.060185,18315.770833,1.5,2.08333,2.79323,-108.676563,26.46539
8,1235.716885,2240.333333,22.721111,13.3722,22.7694,43.850000,18295.166667,0.513889,3.53333,1.03842,-106.667563,29.06239
9,1906.436431,174.111111,28.574537,21.2847,28.6412,8.601852,19137.398148,1.6088,3,1.01662,-114.409563,29.69939


In [15]:
training_data = pd.concat([pres,data],axis=1)

## Extract vector information (Too be implemented soon!)


In [16]:
points = map(lambda c : c.centroid, cells)

In [17]:
from ecoregions.models import TerrEcoregions,InegiIV
ecoregions = map(lambda p : TerrEcoregions.objects.filter(geom__intersects=p),points)
ecovalues = map(lambda q : q.values_list('wwf_mhtnum','wwf_mhtnam'),ecoregions)
ecovals = [r.get() if r.exists() else (np.nan,np.nan) for r in ecovalues ]
vegdat = pd.DataFrame(ecovals)
vegdat.columns = ['vegid','vegname']

In [20]:
vegdat

Unnamed: 0,vegid,vegname
0,13,Deserts and Xeric Shrublands
1,7,"Tropical and Subtropical Grasslands, Savannas ..."
2,3,Tropical and Subtropical Coniferous Forests
3,3,Tropical and Subtropical Coniferous Forests
4,13,Deserts and Xeric Shrublands
5,13,Deserts and Xeric Shrublands
6,13,Deserts and Xeric Shrublands
7,2,Tropical and Subtropical Dry Broadleaf Forests
8,13,Deserts and Xeric Shrublands
9,13,Deserts and Xeric Shrublands


In [21]:
training_data = pd.concat([training_data,vegdat],axis=1)

## Let's save this for the moment

In [22]:
#training_data.to_csv("/outputs/root_train_with_pop_lc.csv")

In [25]:
training_data[:5]

Unnamed: 0,Y,DistanceToRoadMex_mean,Elevation_mean,MaxTemperature_mean,MeanTemperature_mean,MinTemperature_mean,Precipitation_mean,SolarRadiation_mean,Vapor_mean,WindSpeed_mean,WorldPopLatam2010_mean,Longitude,Latitude,vegid,vegname
0,0,3868.661519,63.222222,30.225231,20.7662,30.3171,7.560185,18867.3125,1.75,2.26389,2.71775,-113.380563,27.54339,13,Deserts and Xeric Shrublands
1,0,1379.938936,3.888889,27.693981,23.0694,27.8079,58.791667,16915.25,2.21296,4.41667,3.72007,-97.406563,25.68139,7,"Tropical and Subtropical Grasslands, Savannas ..."
2,1,9728.652812,2353.0,21.844722,13.7833,21.8806,47.480556,18051.152778,1,3.11389,3.48854,-103.629563,22.83939,3,Tropical and Subtropical Coniferous Forests
3,0,2510.77542,1305.777778,27.772222,20.794,27.831,85.865741,18112.976852,1.30093,2.18519,2.5483,-107.255563,25.97539,3,Tropical and Subtropical Coniferous Forests
4,0,4264.779688,1212.777778,26.811343,N.A.,N.A.,19.287037,19742.972222,N.A.,N.A.,N.A.,-107.500563,31.80639,13,Deserts and Xeric Shrublands


In [27]:
## Save it / store it:
training_data.to_csv("/outputs/training_dataset_complete_with_socioeconomical.csv")


In [24]:
points = map(lambda c : c.centroid,cells)

In [25]:
c = cells[0]

In [26]:
c.srid

4326

In [27]:
c.centroid??

[0;31mType:[0m        property
[0;31mString form:[0m <property object at 0x7f85e5005ec0>
[0;31mSource:[0m     
[0;31m# c.centroid.fget[0m[0;34m[0m
[0;34m[0m[0;34m@[0m[0mproperty[0m[0;34m[0m
[0;34m[0m[0;32mdef[0m [0mcentroid[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m
[0;34m[0m    [0mpointstr[0m [0;34m=[0m [0;34m'POINT(%s %s)'[0m[0;34m%[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mlongitude[0m[0;34m,[0m[0mself[0m[0;34m.[0m[0mlatitude[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0mpoint[0m [0;34m=[0m [0mGEOSGeometry[0m[0;34m([0m[0mpointstr[0m[0;34m,[0m[0msrid[0m[0;34m=[0m[0mself[0m[0;34m.[0m[0msrid[0m[0;34m)[0m[0;34m[0m
[0;34m[0m    [0;32mreturn[0m [0mpoint[0m[0;34m[0m[0m


In [28]:
## Put srid to all centroids
#map(lambda p : p.set_srid(4326),points)

In [29]:
p = points[0]

In [None]:
## Convert to Point geometry
from django.contrib.gis.geos import  GEOSGeometry
fx = lambda arr :'POINT(%s %s)'%(arr[0],arr[1])
points = map(lambda p : GEOSGeometry(p,srid=4326), map(fx,cc))

In [55]:
from django.contrib.gis.db.models.functions import Distance
from django.contrib.gis.measure import D

In [43]:
## Import roads
from sketches.models import MexRoads


In [58]:
q = MexRoads.objects.filter(geom__distance_lte=(p,D(m=30000)))

In [59]:
q

<QuerySet [<MexRoads: <Road Layer instance: 0.0 >>, <MexRoads: <Road Layer instance: 0.0 >>, <MexRoads: <Road Layer instance: 0.0 >>]>

In [52]:
print(q)

SELECT "public"."mexroads"."gid", "public"."mexroads"."gid", "public"."mexroads"."tnode_", "public"."mexroads"."lpoly_", "public"."mexroads"."rpoly_", "public"."mexroads"."length", "public"."mexroads"."cov_", "public"."mexroads"."cov_id", "public"."mexroads"."geom" FROM "public"."mexroads" WHERE ST_DWithin("public"."mexroads"."geom", ST_GeomFromEWKB('\001\001\000\000 \346\020\000\000\212q\250&[X\\\300cF\020\230\033\213;@'::bytea), 1)


In [None]:
## Let's calculate distance
>>> from django.contrib.gis.db.models.functions import Distance
>>> pnt = AustraliaCity.objects.get(name='Hobart').point
>>> for city in AustraliaCity.objects.annotate(distance=Distance('point', pnt)):
...     print(city.name, city.distance)
Wollongong 990071.220408 m
Shellharbour 972804.613941 m
Thirroul 1002334.36351 m
...