# Obtaining random cell samples
In this exercise we will obtain a random sample of cells with its associate environmental covariates.

In [1]:
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
from drivers.tree_builder import TreeNeo
from drivers.graph_models import TreeNode, Order, Family, graph,Kingdom,Occurrence
from drivers.graph_models import Cell,Mex4km, countObjectsOf
from drivers.graph_models import pickNode
import matplotlib.pyplot as plt
import pandas as pd
import itertools as it
import numpy as np

## Use the ggplot style
plt.style.use('ggplot')

2. Selecting the space of considered cells.
For this case we will consider all the Mexican Territory. We can load the Mexican polygon from the database with the following commands.

> Failing to do so will likely obtain all the registered cells. In cases could hang the system.

In [2]:
from sketches.models import Country
from mesh.models import MexMesh
from traversals.sampling import UniformRandomCellSample

Mexico = Country.objects.filter(name__contains="exico").get()
mexican_cells = MexMesh.objects.filter(cell__intersects=Mexico.geom)

In [3]:
# Get all cell ids
ids = list(mexican_cells.values('pk'))
CellNodeClass = Mex4km
list_of_cell_ids = ids
sample_size = 300
selection_of_cells = UniformRandomCellSample(ids,Mex4km,sample_size=sample_size,random_seed=12345,with_replacement=True)

INFO Using custom random seed of: 12345
INFO Compiling Query and asking the Graph Database


The size of 'ids' is {{n}}
3. Generate the trees for each cell.


In [4]:
%time cells = list(selection_of_cells)

CPU times: user 1.95 s, sys: 104 ms, total: 2.05 s
Wall time: 15.5 s


1.2 We will use the function *buildTreeNeo* to extract the taxonomic tree in that area.

In [5]:
from traversals import strategies as st
%time trees = map(lambda cell : st.buildTreeNeo(cell),cells)

CPU times: user 41.8 s, sys: 1.27 s, total: 43.1 s
Wall time: 1min 2s


## Union of trees

In [6]:
%time ocs = reduce(lambda a,b : a + b ,map(lambda t : t.occurrences, trees))

CPU times: user 12 ms, sys: 0 ns, total: 12 ms
Wall time: 9.92 ms


In [7]:
## Ohh! super fast (compared to the other method that takes more than 20 minutes for this sample size)
%time bigtree = TreeNeo(ocs,cell_objects=cells)

CPU times: user 408 ms, sys: 0 ns, total: 408 ms
Wall time: 409 ms


In [8]:
## Extract the root (if any!)
root = bigtree.to_Plantae

In [10]:
root

<LocalTree | Kingdom: Plantae - n.count : 1700- | AF: 0.05 >

In [11]:
## Get presences absences
def PresenceAbsence(node,list_of_trees):
    pres_reps = pd.DataFrame({
    'Y' : map(
        lambda tree : tree.hasNode(node),list_of_trees)
    }).astype('int')
    return pres_reps

    

In [12]:
pres = PresenceAbsence(root,trees)

In [13]:
sum(pres.values)

array([112])

In [15]:

vars=['Elevation', 'MaxTemperature', 'MeanTemperature',
      'MinTemperature', 'Precipitation', 'Vapor',
      'SolarRadiation', 'WindSpeed','WorldPopLatam2010' ]
st.getEnvironmentalCovariatesFromListOfCells(cells,vars)

[0;31mSignature:[0m [0mst[0m[0;34m.[0m[0mgetEnvironmentalCovariatesFromListOfCells[0m[0;34m([0m[0mlist_of_cells[0m[0;34m,[0m [0mvars[0m[0;34m=[0m[0;34m[[0m[0;34m'Elevation'[0m[0;34m,[0m [0;34m'MaxTemperature'[0m[0;34m,[0m [0;34m'MeanTemperature'[0m[0;34m,[0m [0;34m'MinTemperature'[0m[0;34m,[0m [0;34m'Precipitation'[0m[0;34m,[0m [0;34m'Vapor'[0m[0;34m,[0m [0;34m'SolarRadiation'[0m[0;34m,[0m [0;34m'WindSpeed'[0m[0;34m][0m[0;34m,[0m [0mwith_coordinates[0m[0;34m=[0m[0mTrue[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Parameters :
    vars (list) name of the environmental layers. By default select all layers.

Returns:
     a Dataframe of the summary statistics of the raster covariates defined in the cell's border (polygon).
[0;31mFile:[0m      /apps/traversals/strategies.py
[0;31mType:[0m      function


## Get environmental Covariates

In [14]:
from traversals import strategies as st

%time data = st.getEnvironmentalCovariatesFromListOfCells(cells)

CPU times: user 5.52 s, sys: 76 ms, total: 5.6 s
Wall time: 2min 24s


### Concatenate two dataframes 
> Build some utility functions for automating this tasks

In [31]:
training_data = pd.concat([pres,data],axis=1)

## Let's save this for the moment

In [32]:
training_data.to_csv("/outputs/root_train.csv")

In [33]:
training_data

Unnamed: 0,Y,Elevation_mean,MaxTemperature_mean,MeanTemperature_mean,MinTemperature_mean,Precipitation_mean,SolarRadiation_mean,Vapor_mean,WindSpeed_mean,Longitude,Latitude
0,0,471.333,27.9719,21.4889,28.0361,41.3222,16507.3,1.5,3.4,-100.248563,26.612390
1,0,63.2222,30.2252,20.7662,30.3171,7.56019,18867.3,1.75,2.26389,-113.380563,27.543390
2,0,28.4444,32.2956,25.8333,32.3287,98.7153,18081.7,2.5,2.33333,-88.684563,20.585390
3,0,3.88889,27.694,23.0694,27.8079,58.7917,16915.2,2.21296,4.41667,-97.406563,25.681390
4,1,2353,21.8447,13.7833,21.8806,47.4806,18051.2,1,3.11389,-103.629563,22.839390
5,1,1567.78,26.0403,17.1597,26.0972,35.037,18133.3,1.13889,2.79861,-106.177563,28.768390
6,0,1305.78,27.7722,20.794,27.831,85.8657,18113,1.30093,2.18519,-107.255563,25.975390
7,0,1118.22,26.3725,18,26.4306,18.1319,19512.6,1.08333,3.31389,-106.373563,31.708390
8,0,2079.22,23.4965,13.4931,23.5417,38.8519,18373.9,0.583333,3.08333,-107.353563,28.523390
9,1,1727.67,26.1475,17.5324,26.1968,40.1852,17223.4,1.19444,2.48843,-105.050563,25.828390
