In [47]:
%matplotlib inline

# Loading Tree Taxonomies from the Knowledge Base

## Let's first load modules and models to start inferring.

In [1]:
from drivers.neo4j_reader import TreeNeo
from mesh.models import MexMesh
from gbif.taxonomy import Occurrence, Taxonomy, GriddedTaxonomy
from drivers.neo4j_reader import Cell , extractOccurrencesFromTaxonomies
from py2neo import Graph
from django.contrib.gis.geos import GEOSGeometry

* Instantiate the graph with default parameters.

In [2]:
g = Graph()

### We define an area in a WKT format. 

In [3]:
polystr = "POLYGON((-109 27,-106 27,-106 30,-109 30,-109 27))"
polygon = GEOSGeometry(polystr)

### Subselect the grid to match the region



In [4]:
mexgrid = MexMesh.objects.filter(cell__intersects=polygon)

### Instantiate the biosphere


In [5]:
biosphere = Occurrence.objects.all()
## Filter by polygon.
subbiosphere = biosphere.filter(geom__intersects=polygon)

## Instantiate the gridded Taxonomy


In [6]:
## It will take some time because it's actually pulling all the polygons from the cells in the postgres database
%time ggg = GriddedTaxonomy(subbiosphere,mexgrid.filter(cell__intersects=polystr),generate_tree_now=False,use_id_as_name=False)

CPU times: user 11.5 s, sys: 204 ms, total: 11.7 s
Wall time: 11.7 s


Exception django.contrib.gis.gdal.error.GDALException: GDALException('Invalid pointer returned from "GDALClose"',) in 

# Load a tree from the gridded taxonomy

In [7]:
import biospatial.settings as sets

* Let's assume that we want to generate the Tree from ... 150 taxonomies. 
Then we do this...


In [8]:
taxonomies = ggg.taxonomies[0:150]
## yes, yes I'm using the index , we'll se how to select a random sample later.


* Now, we need the occurrences to instantiate a NeoTree object. We can do this with:

In [9]:
%time occurrences = extractOccurrencesFromTaxonomies(taxonomies)

CPU times: user 3.19 s, sys: 60 ms, total: 3.25 s
Wall time: 3.98 s


In [10]:
## Let's see how many occurrences are there:
len(occurrences)

1183

In [11]:
### oK, NOW FINALLY LETS bring the data to life.
%time tree = TreeNeo(occurrences)

CPU times: user 5.37 s, sys: 136 ms, total: 5.51 s
Wall time: 6.43 s


# Exploring the tree

In [12]:
# Compare with normal calculation/
%time taxs = ggg.taxonomies[0:150]

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 17.9 µs


In [13]:
%time lts = map(lambda t : t.generateTREE(),taxs)

CPU times: user 5.5 s, sys: 136 ms, total: 5.64 s
Wall time: 8.81 s


In [18]:
cells = tree.getExactCells()

In [19]:
cell_neighbours = map(lambda c: c.getNeighbours(),cells)

In [20]:
cc = cell_neighbours[0]

In [21]:
cc

[<Cell id=182269>, <Cell id=181689>, <Cell id=181102>, <Cell id=181687>]

In [22]:
nc = cc[0]

In [23]:
ocs = nc.occurrencesHere()

In [24]:
ocs

[<TreeNode type: Occurrence id = None name: Campostoma ornatum>,
 <TreeNode type: Occurrence id = None name: Gila robusta>,
 <TreeNode type: Occurrence id = None name: Cyprinella formosa>,
 <TreeNode type: Occurrence id = None name: Platynus ovatulus>,
 <TreeNode type: Occurrence id = None name: Platynus ovatulus>,
 <TreeNode type: Occurrence id = None name: Chlaenius tomentosus>,
 <TreeNode type: Occurrence id = None name: Harpalus caliginosus>,
 <TreeNode type: Occurrence id = None name: Catostomus leopoldi>,
 <TreeNode type: Occurrence id = None name: Catostomus bernardini>,
 <TreeNode type: Occurrence id = None name: Platynus ovatulus>]

In [16]:
trees = tree.getNeighboringTrees()

AttributeError: 'list' object has no attribute 'occurrencesHere'

In [15]:
# summon trees
c = cells[0]

In [87]:
n=ls[0]

In [88]:
n=n.start_node()

In [89]:
n.labels

<bound method Node.labels of (d01adba:Occurrence {event_date:"1983-05-07T00:00:00+00:00",geom:"SRID=4326;POINT (-108.633 29.889)",latitude:29.889,level:999,levelname:"Occurrence",longitude:-108.633,month:5,name:"Chlaenius leucoscelis",pk:667662,scientific_name:"Chlaenius leucoscelis Chevrolat",species_id:4989004,year:1983})>

In [16]:
#Perhaps we want to explore manually, use the prefix to_
tree.to_Animalia.to_Chordata.to_Aves.to_Falconiformes

<TreeNode | Order: Falconiformes - n.count : 4- >

### The n.count: value gives you information of how many occurrences of this type the tree has.
In this case it has 587 vertebrates.

### Let's pull some information relating the environment of these vertebrates.


In [17]:
vertebrates = tree.to_Animalia.to_Chordata

In [19]:
vertebrates.pullbackRasterNodes?


In [20]:
data = vertebrates.pullbackRasterNodes??


In [None]:
data = vertebrates.pullbackRasterNodes

the information in data is a list composed of a duple (raster node, occurrence) but also this information can also be reached in the form of attributes within the 'associatedData' field.

In [24]:
tempsverts = vertebrates.associatedData.getValuesFromPoints('MeanTemperature')

In [25]:
vertebrates.associatedData.points_MeanTemperature

<drivers.neo4j_reader.RasterPointNodesList at 0x7f5f02ac6b10>

It automatically adds these new 'layer' to the attributes

In [26]:
tempsverts.table

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December,registered_value,date
0,6.0,7.7,10.7,14.700000,19.100000,23.500000,23.700001,22.299999,19.900000,15.400000,10.0,6.7,23.700001,NaT
1,5.9,7.7,10.7,14.700000,19.100000,23.500000,23.700001,22.299999,19.900000,15.300000,10.0,6.7,23.700001,NaT
2,3.1,4.1,6.3,9.900000,13.400000,17.700001,18.400000,17.500000,15.900000,12.100000,7.3,4.2,17.500000,NaT
3,9.6,11.0,13.6,17.000000,21.200001,25.900000,25.799999,24.600000,23.299999,19.100000,13.8,10.3,17.000000,1980-04-07
4,3.6,5.0,7.5,11.200000,14.900000,19.299999,19.799999,18.799999,17.100000,13.100000,8.0,4.7,19.299999,1978-06-08
5,4.7,6.2,8.7,12.500000,16.200001,20.400000,20.700001,19.600000,17.900000,14.100000,8.9,5.7,19.600000,1975-08-05
6,4.7,6.2,8.7,12.500000,16.200001,20.400000,20.700001,19.600000,17.900000,14.100000,8.9,5.7,19.600000,1975-08-05
7,3.5,4.8,7.0,10.800000,14.400000,18.799999,19.299999,18.400000,16.700001,12.800000,7.8,4.6,18.400000,NaT
8,3.5,4.8,7.0,10.800000,14.400000,18.799999,19.299999,18.400000,16.700001,12.800000,7.8,4.6,18.400000,NaT
9,4.7,6.2,8.7,12.500000,16.200001,20.400000,20.700001,19.600000,17.900000,14.100000,8.9,5.7,19.600000,1975-08-05


In [28]:
tempsverts.table.sort('date')

  if __name__ == '__main__':


Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December,registered_value,date
300,7.6,9.5,12.7,16.600000,20.799999,25.100000,25.100000,23.799999,21.799999,17.299999,11.7,8.2,25.100000,1970-06-29
301,7.6,9.6,12.8,16.700001,20.900000,25.200001,25.200001,23.900000,21.799999,17.400000,11.7,8.2,25.200001,1970-06-29
306,5.8,7.6,10.7,14.700000,19.100000,23.500000,23.700001,22.400000,19.900000,15.300000,9.9,6.6,23.700001,1971-07-12
307,5.8,7.6,10.7,14.700000,19.100000,23.500000,23.700001,22.400000,19.900000,15.300000,9.9,6.6,23.700001,1971-07-12
297,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
298,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
299,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
305,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
296,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
337,3.5,4.9,7.3,11.000000,14.600000,19.100000,19.600000,18.600000,16.900000,13.000000,8.0,4.6,19.100000,1971-09-12


In [29]:
dates = tempsverts.table.date

In [30]:
import pandas as pd

In [31]:
fechas = pd.date_range(start='1980/01/01', end='2016/09/09', freq='D')

In [32]:
fechas

DatetimeIndex(['1980-01-01', '1980-01-02', '1980-01-03', '1980-01-04',
               '1980-01-05', '1980-01-06', '1980-01-07', '1980-01-08',
               '1980-01-09', '1980-01-10',
               ...
               '2016-08-31', '2016-09-01', '2016-09-02', '2016-09-03',
               '2016-09-04', '2016-09-05', '2016-09-06', '2016-09-07',
               '2016-09-08', '2016-09-09'],
              dtype='datetime64[ns]', length=13402, freq='D')

In [34]:
vals = tempsverts.table.sort_values(by='registered_value')

In [36]:
vals.mean()

January              5.452048
February             7.013140
March                9.662116
April               13.395222
May                 17.324232
June                21.674403
July                22.024403
August              20.899317
September           18.996758
October             14.851024
November             9.659044
December             6.346928
registered_value    15.242321
dtype: float64

In [37]:
vals.std()

January             2.014071
February            2.409039
March               2.730392
April               2.769976
May                 3.090127
June                3.079537
July                2.784377
August              2.615372
September           2.432924
October             2.222413
November            1.948419
December            1.854097
registered_value    5.692786
dtype: float64

In [23]:
vertebrates.setOccurrences()

[<Occurrence pk=688258>,
 <Occurrence pk=688266>,
 <Occurrence pk=1029809>,
 <Occurrence pk=2406040>,
 <Occurrence pk=955965>,
 <Occurrence pk=1771914>,
 <Occurrence pk=2695732>,
 <Occurrence pk=3054539>,
 <Occurrence pk=1023258>,
 <Occurrence pk=2690400>,
 <Occurrence pk=1768833>,
 <Occurrence pk=1770810>,
 <Occurrence pk=1769991>,
 <Occurrence pk=1023251>,
 <Occurrence pk=3054189>,
 <Occurrence pk=1765395>,
 <Occurrence pk=1767201>,
 <Occurrence pk=2697193>,
 <Occurrence pk=1172068>,
 <Occurrence pk=1766429>,
 <Occurrence pk=2701298>,
 <Occurrence pk=3035140>,
 <Occurrence pk=2903877>,
 <Occurrence pk=1334896>,
 <Occurrence pk=436322>,
 <Occurrence pk=436365>,
 <Occurrence pk=436369>,
 <Occurrence pk=950397>,
 <Occurrence pk=2894439>,
 <Occurrence pk=2890506>,
 <Occurrence pk=1329084>,
 <Occurrence pk=1338649>,
 <Occurrence pk=745384>,
 <Occurrence pk=741992>,
 <Occurrence pk=2406045>,
 <Occurrence pk=743000>,
 <Occurrence pk=433307>,
 <Occurrence pk=817928>,
 <Occurrence pk=817934>,

In [25]:
cells=vertebrates.getExactCells()


In [26]:
c = cells[0
         ]

In [27]:
c.getNeighbours()

[<Cell id=180519>, <Cell id=181107>, <Cell id=180521>, <Cell id=179926>]

In [30]:
vertebrates.associatedData.getAssociatedRasterAreaData('MeanTemperature')

DEBUG (0.278) SELECT ST_Clip(ST_Union("bioclim"."tavg"."rast"),ST_GeomFromText('MULTIPOLYGON (((-108.4070634159985 30.01788980100015, -108.3580634159985 30.01788980100015, -108.3580634159985 29.96888980100015, -108.4070634159985 29.96888980100015, -108.4070634159985 30.01788980100015)), ((-106.5940634159982 29.96888980100015, -106.5450634159982 29.96888980100015, -106.5450634159982 29.91988980100015, -106.5940634159982 29.91988980100015, -106.5940634159982 29.96888980100015)), ((-109.0440634159986 30.01788980100015, -108.9950634159986 30.01788980100015, -108.9950634159986 29.96888980100015, -109.0440634159986 29.96888980100015, -109.0440634159986 30.01788980100015)), ((-106.5450634159982 29.96888980100015, -106.4960634159982 29.96888980100015, -106.4960634159982 29.91988980100015, -106.5450634159982 29.91988980100015, -106.5450634159982 29.96888980100015)), ((-106.4470634159982 30.01788980100015, -106.3980634159982 30.01788980100015, -106.3980634159982 29.96888980100015, -106.447063415

<raster_api.tools.RasterData at 0x7f26d70fb410>