# How many data out there? 
We will retrieve all the cells at lower resolution. Check if they have data (children) and then convert it to raster ("not take much time on this).

## Later 
In this exercise I'll perform a spatial autocorrelation model using the tools available in spystats and the data stored in *Biospytial*
In particular estimate a richness model.


In [24]:
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
from drivers.tree_builder import TreeNeo
from drivers.graph_models import TreeNode, Order, Family, graph,Kingdom,Occurrence
from drivers.graph_models import Cell,Mex4km, countObjectsOf
import matplotlib.pyplot as plt
## Use the ggplot style
plt.style.use('ggplot')

## Take a subsample of the Cells.

In [25]:
n = graph.data("MATCH (n:mex4km) RETURN Count(n)").pop()['Count(n)']

In [26]:
n = countObjectsOf(Mex4km)
print(n)

279277


### A lot of data we need to use a sampling method.

In [27]:
import numpy as np
np.random.seed(12345)
sample_size = 10000
choices = np.random.choice(range(1,n),sample_size,replace='False')

### Query for exporting a selection of cells.

In [28]:
c = list(choices)
## This will stringify the id list to get the selected cells.
sel = Mex4km.select(graph).where("_.id IN  %s "%str(c))

In [29]:
%time samples = list(sel)

CPU times: user 1min 23s, sys: 4.16 s, total: 1min 28s
Wall time: 7min 27s


In [32]:
%time kingdoms = map(lambda c : list(c.has_kingdoms),samples)

CPU times: user 13 s, sys: 980 ms, total: 14 s
Wall time: 43.5 s


In [34]:
richking = map(lambda k : len(k) , kingdoms)

In [36]:
richking.sort()

In [7]:
c = samples[6]

In [8]:
list(c.Occurrences)

[<Occurrence pk=None>,
 <Occurrence pk=2604072>,
 <Occurrence pk=1573611>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=1956022>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=589504>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>]

In [9]:
list(c.has_kingdoms)

[<TreeNode type: Kingdom id = 6 name: Plantae>]

In [10]:
list(c.has_classes)

[<TreeNode type: Class id = 220 name: Magnoliopsida>]

In [11]:
list(c.has_families)

[<TreeNode type: Family id = 2505 name: Ericaceae>,
 <TreeNode type: Family id = 3065 name: Asteraceae>,
 <TreeNode type: Family id = 5386 name: Fabaceae>]

In [12]:
list(c.has_genera)

[<TreeNode type: Genus id = 2882796 name: Arbutus>,
 <TreeNode type: Genus id = 3094959 name: Gochnatia>,
 <TreeNode type: Genus id = 2956904 name: Senna>]

In [13]:
list(c.has_species)

[<TreeNode type: Specie id = 2882799 name: Arbutus xalapensis Kunth>,
 <TreeNode type: Specie id = 2882799 name: Arbutus xalapensis subsp. texana (Buckley) A.E.Murray>,
 <TreeNode type: Specie id = 2882799 name: Arbutus texana Buckley>,
 <TreeNode type: Specie id = 3095015 name: Gochnatia hypoleuca subsp. hypoleuca>,
 <TreeNode type: Specie id = 3095015 name: Gochnatia hypoleuca (DC.) A. Gray>,
 <TreeNode type: Specie id = 2957665 name: Senna demissa var. radicans (H.S.Irwin & Barneby) H.S.Irwin & Barneby>,
 <TreeNode type: Specie id = 2957665 name: Senna demissa (Rose) H.S.Irwin & Barneby>,
 <TreeNode type: Specie id = 2957665 name: Senna demissa var. demissa>,
 <TreeNode type: Specie id = 2957665 name: Cassia demissa Rose>,
 <TreeNode type: Specie id = 2957665 name: Cassia demissa var. radicans H.S.Irwin & Barneby>]

In [14]:
list(c.has_orders)

[<TreeNode type: Order id = 1353 name: Ericales>,
 <TreeNode type: Order id = 414 name: Asterales>,
 <TreeNode type: Order id = 1370 name: Fabales>]

In [15]:
list(c.has_phyla)

[<TreeNode type: Phylum id = 49 name: Magnoliophyta>]

In [16]:
list(c.has_occurrences)

[<Occurrence pk=1956022>,
 <Occurrence pk=1573611>,
 <Occurrence pk=589504>,
 <Occurrence pk=2604072>]

In [17]:
list(c._getAssociatedNodesPerTaxonLevel(Occurrence))

[<Occurrence pk=1956022>,
 <Occurrence pk=1573611>,
 <Occurrence pk=589504>,
 <Occurrence pk=2604072>]

In [None]:
c.has_phyla

In [20]:
%time list(c.has_occurrences)

CPU times: user 12 ms, sys: 0 ns, total: 12 ms
Wall time: 12.5 ms


[<Occurrence pk=1956022>,
 <Occurrence pk=1573611>,
 <Occurrence pk=589504>,
 <Occurrence pk=2604072>]

In [21]:
%time list(c.Occurrences)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 48.2 µs


[<Occurrence pk=None>,
 <Occurrence pk=2604072>,
 <Occurrence pk=1573611>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=1956022>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=589504>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>,
 <Occurrence pk=None>]

## Look for taxonimic types in each cell
Not necessary to instantiate the entire tree

In [22]:
import drivers.tree_builder as tb
%time trees = map(lambda cell : tb.buildTreeNeo(cell),samples)

CPU times: user 5.1 s, sys: 128 ms, total: 5.23 s
Wall time: 5.68 s


In [23]:
c = samples[:10]

In [None]:
trees

In [None]:
f = trees[3]

In [None]:
c = samples[2] 

In [None]:
rr = list(c.Families)

In [None]:
f11 = rr[1]

In [None]:
f11.occurrencesHere

In [None]:
list(c.Occurrences)

In [None]:
c[0]

In [None]:
cc = c[0]

In [None]:
ff = iter(cc.Families)

In [None]:
ff.next

In [None]:
list(cc.Occurrences)

In [None]:
f.children_link

In [None]:
list(c.contained_in)

In [None]:
list(c.Families)

In [None]:
from drivers.graph_models import graph

In [None]:
%time available_rels = map(lambda cell : list(graph.match(end_node=cell.__ogm__.node,rel_type='IS_IN')),samples)

In [None]:
something = available_rels[26]

## List available node types per cell

In [None]:
def _try_levelnames_extraction(relationship):
    """
    Extracts the end node relationship name.
    for use with map functions.
    """
    try:
        a = relationship.start_node()['levelname']
        return a
    except:
        return None
    
types = map(lambda r : map(lambda t : _try_levelnames_extraction(t),r),available_rels)

In [None]:
types

In [None]:
tt = tb.buildTreeNeo(samples[26])

In [None]:
#For now not run
#big_tree = reduce(lambda a,b : a+b , trees)
import seaborn as sns

In [None]:
t = trees[2]

In [None]:
ll = map(lambda t : t.richness , trees)

In [None]:
sns.distplot(ll)

In [None]:
tl.plotTree(tt)

In [None]:
import traversals.strategies as strg

In [None]:
type(root)

In [None]:
root = t.node

In [None]:
a = strg.getPresencesForNode(root,trees)

In [None]:
data_t = strg.getPresencesForListOfNodes([root],trees)

In [None]:
data_t

# The model

In [None]:
import pymc3 as pm

In [None]:
-

In [None]:
from pymc3 import find_MAP
map_estimate = find_MAP(model=model)
map_estimate

In [None]:
import pandas as pd

In [None]:
mapxy = pd.concat([data_t[['Longitude','Latitude']],pd.DataFrame({'map': map_estimate['latent_field']})],axis=1)

In [None]:
gmapxy = tools.toGeoDataFrame(mapxy,xcoord_name='Longitude',ycoord_name='Latitude')

In [None]:
fig, ax = plt.subplots(figsize=(14, 9));
gmapxy.plot(ax=ax,column='map')

## Prediction
The conditional method creates the conditional, or predictive, distribution over the latent function at arbitrary x∗x∗ input points, f(x∗)f(x∗). To construct the conditional distribution we write:

In [None]:
minx = min(data_t.Longitude)
maxx = max(data_t.Longitude)
miny = min(data_t.Latitude)
maxy = max(data_t.Latitude)

In [None]:
from external_plugins.spystats.spystats import tools

In [None]:
grid = tools.createGrid(grid_sizex=10,grid_sizey=10,minx=minx,miny=miny,maxx=maxx,maxy=maxy)

In [None]:
gp.predict(grid[['Lon','Lat']])

In [None]:
%time f_star = gp.conditional("f_star", X=grid[['Lon','Lat']])

In [None]:
getdata = lambda tree : tree.associatedData.getEnvironmentalVariablesCells()

In [None]:
ts[1].associatedData.getEnvironmentalVariablesCells()

In [None]:
list(choices)

In [None]:
n