# Obtaining random cell samples
In this exercise we will obtain a random sample of cells to predict presence of reptiles.
First thing load the libraries


In [3]:
%matplotlib inline
import sys
sys.path.append('/apps')
import django
#django.setup()
from drivers.tree_builder import TreeNeo
from drivers.graph_models import TreeNode, Order, Family, graph,Kingdom,Occurrence
from drivers.graph_models import Cell,Mex4km, countObjectsOf
from drivers.graph_models import pickNode
import matplotlib.pyplot as plt
import pandas as pd
import itertools as it
import numpy as np

## Use the ggplot style
plt.style.use('ggplot')

2. Selecting the space of considered cells.
For this case we will consider all the Mexican Territory. We can load the Mexican polygon from the database with the following commands.

> Failing to do so will likely obtain all the registered cells. In cases could hang the system.

In [2]:
from sketches.models import Country
from mesh.models import MexMesh
from traversals.sampling import UniformRandomCellSample

Mexico = Country.objects.filter(name__contains="exico").get()
mexican_cells = MexMesh.objects.filter(cell__intersects=Mexico.geom)

In [4]:
# Get all cell ids
ids = list(mexican_cells.values('pk'))
CellNodeClass = Mex4km
list_of_cell_ids = ids
sample_size = 601
selection_of_cells = UniformRandomCellSample(ids,Mex4km,sample_size=sample_size,random_seed=12345,with_replacement=True)

INFO Using custom random seed of: 12345
INFO Compiling Query and asking the Graph Database


The size of 'ids' is {{n}}
3. Generate the trees for each cell.


In [5]:
%time cells = list(selection_of_cells)

CPU times: user 4.2 s, sys: 164 ms, total: 4.37 s
Wall time: 31.2 s


1.2 We will use the function *buildTreeNeo* to extract the taxonomic tree in that area.

In [6]:
from traversals import strategies as st
%time trees = map(lambda cell : st.buildTreeNeo(cell),cells)

CPU times: user 1min 41s, sys: 2.9 s, total: 1min 44s
Wall time: 2min 55s


## Union of trees

In [7]:
%time ocs = reduce(lambda a,b : a + b ,map(lambda t : t.occurrences, trees))

CPU times: user 148 ms, sys: 0 ns, total: 148 ms
Wall time: 147 ms


In [8]:
## Ohh! super fast (compared to the other method that takes more than 20 minutes for this sample size)
%time bigtree = TreeNeo(ocs,cell_objects=cells)

CPU times: user 892 ms, sys: 28 ms, total: 920 ms
Wall time: 904 ms


In [9]:
## Extract the reptiles (if any!)
reptiles = bigtree.to_Animalia.to_Chordata.to_Reptilia

In [10]:
## Get presences absences
pres_reps = pd.DataFrame({
    'Y' : map(
        lambda tree : tree.hasNode(reptiles),trees)
    }).astype('int')



In [13]:
pres_reps

Unnamed: 0,Y
0,0
1,1
2,0
3,0
4,0
5,1
6,0
7,0
8,0
9,0


## Get environmental Covariates

In [15]:
from traversals import strategies as st

%time data = st.getEnvironmentalCovariatesFromListOfCells(cells)

CPU times: user 9.34 s, sys: 196 ms, total: 9.53 s
Wall time: 3min 22s


### Concatenate two dataframes 
> Build some utility functions for automating this tasks

In [16]:
training_data = pd.concat([pres_reps,data],axis=1)

## Let's save this for the moment

In [18]:
training_data.to_csv("/outputs/reptiles_train.csv")