In [3]:
%matplotlib inline

# Loading Tree Taxonomies from the Knowledge Base

## Let's first load modules and models to start inferring.

In [4]:
from drivers.neo4j_reader import TreeNeo
from mesh.models import MexMesh
from gbif.taxonomy import Occurrence, Taxonomy, GriddedTaxonomy
from drivers.neo4j_reader import Cell , extractOccurrencesFromTaxonomies
from py2neo import Graph
from django.contrib.gis.geos import GEOSGeometry

* Instantiate the graph with default parameters.

In [5]:
g = Graph()

### We define an area in a WKT format. 

In [6]:
polystr = "POLYGON((-109 27,-106 27,-106 30,-109 30,-109 27))"
polygon = GEOSGeometry(polystr)

### Subselect the grid to match the region



In [7]:
mexgrid = MexMesh.objects.filter(cell__intersects=polygon)

### Instantiate the biosphere


In [8]:
biosphere = Occurrence.objects.all()
## Filter by polygon.
subbiosphere = biosphere.filter(geom__intersects=polygon)

## Instantiate the gridded Taxonomy


In [9]:
## It will take some time because it's actually pulling all the polygons from the cells in the postgres database
ggg = GriddedTaxonomy(subbiosphere,mexgrid.filter(cell__intersects=polystr),generate_tree_now=False,use_id_as_name=False)

Exception django.contrib.gis.gdal.error.GDALException: GDALException('Invalid pointer returned from "GDALClose"',) in DEBUG (0.050) SELECT "grid4km-mex"."gid", "grid4km-mex"."geom" FROM "grid4km-mex" WHERE (ST_Intersects("grid4km-mex"."geom", ST_GeomFromEWKB('\x0103000020e610000001000000050000000000000000405bc00000000000003b400000000000805ac00000000000003b400000000000805ac00000000000003e400000000000405bc00000000000003e400000000000405bc00000000000003b40'::bytea)) AND ST_Intersects("grid4km-mex"."geom", ST_GeomFromEWKB('\x0103000020e610000001000000050000000000000000405bc00000000000003b400000000000805ac00000000000003b400000000000805ac00000000000003e400000000000405bc00000000000003e400000000000405bc00000000000003b40'::bytea))); args=(<django.contrib.gis.db.backends.postgis.adapter.PostGISAdapter object at 0x7f5f20a6eb50>, <django.contrib.gis.db.backends.postgis.adapter.PostGISAdapter object at 0x7f5f4e327ad0>)
DEBUG:django.db.backends:(0.050) SELECT "grid4km-mex"."gid", "grid4km-mex"."geom"

# Load a tree from the gridded taxonomy

* Let's assume that we want to generate the Tree from ... 150 taxonomies. 
Then we do this...


In [12]:
taxonomies = ggg.taxonomies[0:150]
## yes, yes I'm using the index , we'll se how to select a random sample later.


* Now, we need the occurrences to instantiate a NeoTree object. We can do this with:

In [13]:
occurrences = extractOccurrencesFromTaxonomies(taxonomies)

DEBUG (0.002) SELECT "gbif_occurrence_csv"."id_gbif", "gbif_occurrence_csv"."dataset_id", "gbif_occurrence_csv"."institution_code", "gbif_occurrence_csv"."collection_code", "gbif_occurrence_csv"."catalog_number", "gbif_occurrence_csv"."basis_of_record", "gbif_occurrence_csv"."scientific_name", "gbif_occurrence_csv"."kingdom", "gbif_occurrence_csv"."phylum", "gbif_occurrence_csv"."_class", "gbif_occurrence_csv"."_order", "gbif_occurrence_csv"."family", "gbif_occurrence_csv"."genus", "gbif_occurrence_csv"."specific_epithet", "gbif_occurrence_csv"."kingdom_id", "gbif_occurrence_csv"."phylum_id", "gbif_occurrence_csv"."class_id", "gbif_occurrence_csv"."order_id", "gbif_occurrence_csv"."family_id", "gbif_occurrence_csv"."genus_id", "gbif_occurrence_csv"."species_id", "gbif_occurrence_csv"."country_code", "gbif_occurrence_csv"."latitude", "gbif_occurrence_csv"."longitude", "gbif_occurrence_csv"."year", "gbif_occurrence_csv"."month", "gbif_occurrence_csv"."day", "gbif_occurrence_csv"."event_d

In [14]:
## Let's see how many occurrences are there:
len(occurrences)

1183

In [15]:
### oK, NOW FINALLY LETS bring the data to life.
tree = TreeNeo(occurrences)

# Exploring the tree

In [16]:
#To see how many Kingdoms it has do:
tree.kingdoms


[<TreeNode | Kingdom: Animalia - n.count : 733- >,
 <TreeNode | Kingdom: Plantae - n.count : 450- >]

In [17]:
#Perhaps we want to explore manually, use the prefix to_
tree.to_Animalia.to_Chordata

<TreeNode | Phylum: Chordata - n.count : 587- >

### The n.count: value gives you information of how many occurrences of this type the tree has.
In this case it has 587 vertebrates.

### Let's pull some information relating the environment of these vertebrates.


In [18]:
vertebrates = tree.to_Animalia.to_Chordata

In [19]:
vertebrates.pullbackRasterNodes?


In [23]:
data = vertebrates.pullbackRasterNodes('MeanTemperature')

the information in data is a list composed of a duple (raster node, occurrence) but also this information can also be reached in the form of attributes within the 'associatedData' field.

In [24]:
tempsverts = vertebrates.associatedData.getValuesFromPoints('MeanTemperature')

In [25]:
vertebrates.associatedData.points_MeanTemperature

<drivers.neo4j_reader.RasterPointNodesList at 0x7f5f02ac6b10>

It automatically adds these new 'layer' to the attributes

In [26]:
tempsverts.table

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December,registered_value,date
0,6.0,7.7,10.7,14.700000,19.100000,23.500000,23.700001,22.299999,19.900000,15.400000,10.0,6.7,23.700001,NaT
1,5.9,7.7,10.7,14.700000,19.100000,23.500000,23.700001,22.299999,19.900000,15.300000,10.0,6.7,23.700001,NaT
2,3.1,4.1,6.3,9.900000,13.400000,17.700001,18.400000,17.500000,15.900000,12.100000,7.3,4.2,17.500000,NaT
3,9.6,11.0,13.6,17.000000,21.200001,25.900000,25.799999,24.600000,23.299999,19.100000,13.8,10.3,17.000000,1980-04-07
4,3.6,5.0,7.5,11.200000,14.900000,19.299999,19.799999,18.799999,17.100000,13.100000,8.0,4.7,19.299999,1978-06-08
5,4.7,6.2,8.7,12.500000,16.200001,20.400000,20.700001,19.600000,17.900000,14.100000,8.9,5.7,19.600000,1975-08-05
6,4.7,6.2,8.7,12.500000,16.200001,20.400000,20.700001,19.600000,17.900000,14.100000,8.9,5.7,19.600000,1975-08-05
7,3.5,4.8,7.0,10.800000,14.400000,18.799999,19.299999,18.400000,16.700001,12.800000,7.8,4.6,18.400000,NaT
8,3.5,4.8,7.0,10.800000,14.400000,18.799999,19.299999,18.400000,16.700001,12.800000,7.8,4.6,18.400000,NaT
9,4.7,6.2,8.7,12.500000,16.200001,20.400000,20.700001,19.600000,17.900000,14.100000,8.9,5.7,19.600000,1975-08-05


In [28]:
tempsverts.table.sort('date')

  if __name__ == '__main__':


Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December,registered_value,date
300,7.6,9.5,12.7,16.600000,20.799999,25.100000,25.100000,23.799999,21.799999,17.299999,11.7,8.2,25.100000,1970-06-29
301,7.6,9.6,12.8,16.700001,20.900000,25.200001,25.200001,23.900000,21.799999,17.400000,11.7,8.2,25.200001,1970-06-29
306,5.8,7.6,10.7,14.700000,19.100000,23.500000,23.700001,22.400000,19.900000,15.300000,9.9,6.6,23.700001,1971-07-12
307,5.8,7.6,10.7,14.700000,19.100000,23.500000,23.700001,22.400000,19.900000,15.300000,9.9,6.6,23.700001,1971-07-12
297,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
298,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
299,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
305,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
296,3.6,5.0,7.4,11.200000,14.800000,19.200001,19.799999,18.700001,17.000000,13.100000,8.0,4.6,17.000000,1971-09-11
337,3.5,4.9,7.3,11.000000,14.600000,19.100000,19.600000,18.600000,16.900000,13.000000,8.0,4.6,19.100000,1971-09-12


In [29]:
dates = tempsverts.table.date

In [30]:
import pandas as pd

In [31]:
fechas = pd.date_range(start='1980/01/01', end='2016/09/09', freq='D')

In [32]:
fechas

DatetimeIndex(['1980-01-01', '1980-01-02', '1980-01-03', '1980-01-04',
               '1980-01-05', '1980-01-06', '1980-01-07', '1980-01-08',
               '1980-01-09', '1980-01-10',
               ...
               '2016-08-31', '2016-09-01', '2016-09-02', '2016-09-03',
               '2016-09-04', '2016-09-05', '2016-09-06', '2016-09-07',
               '2016-09-08', '2016-09-09'],
              dtype='datetime64[ns]', length=13402, freq='D')

In [34]:
vals = tempsverts.table.sort_values(by='registered_value')

In [36]:
vals.mean()

January              5.452048
February             7.013140
March                9.662116
April               13.395222
May                 17.324232
June                21.674403
July                22.024403
August              20.899317
September           18.996758
October             14.851024
November             9.659044
December             6.346928
registered_value    15.242321
dtype: float64

In [37]:
vals.std()

January             2.014071
February            2.409039
March               2.730392
April               2.769976
May                 3.090127
June                3.079537
July                2.784377
August              2.615372
September           2.432924
October             2.222413
November            1.948419
December            1.854097
registered_value    5.692786
dtype: float64