From f4659109940230f9f5db15ad2e36b2494b2f9322 Mon Sep 17 00:00:00 2001 From: Scott Chamberlain Date: Tue, 8 Dec 2015 09:25:42 +0100 Subject: [PATCH] binomen post --- _drafts/2015-12-08-binomen-taxonomy-tools.Rmd | 186 ++ _drafts/2015-12-08-binomen-taxonomy-tools.md | 352 +++ _posts/2015-12-08-binomen-taxonomy-tools.md | 352 +++ .../index.html | 18 +- .../2010/12/ngram-ecological-terms/index.html | 20 +- .../index.html | 18 +- .../01/bipartite-networks-and-r/index.html | 18 +- .../index.html | 18 +- .../index.html | 20 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 20 +- .../index.html | 20 +- .../r-and-google-visualization-api/index.html | 20 +- _site/2011/01/r-bloggers/index.html | 18 +- .../2011/02/farmer-s-markets-data/index.html | 20 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 20 +- .../r-overtakes-sas-in-popularity/index.html | 18 +- .../troubling-news-for-teaching-of/index.html | 18 +- .../index.html | 18 +- .../basic-ggplot2-network-graphs/index.html | 18 +- _site/2011/03/bio-oracle/index.html | 18 +- .../03/check-out-phyloseminar-org/index.html | 18 +- _site/2011/03/cloudnumbers-com/index.html | 18 +- .../index.html | 18 +- _site/2011/{02 => 03}/rstudio/index.html | 20 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../04/phylometa-from-r-udpate/index.html | 18 +- .../04/processing-nested-lists/index.html | 20 +- .../2011/04/rstudio-beta-2-is-out/index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- _site/2011/05/google-reader/index.html | 18 +- .../index.html | 18 +- .../plyrs-idataframe-vs-dataframe/index.html | 18 +- .../index.html | 20 +- .../simple-function-for-plotting/index.html | 18 +- .../2011/05/treebase-trees-from-r/index.html | 18 +- .../06/10000-visits-to-my-recology/index.html | 18 +- .../index.html | 18 +- .../2011/06/how-to-fit-power-laws/index.html | 18 +- .../2011/06/ievobio-2011-synopsis/index.html | 20 +- .../index.html | 18 +- .../index.html | 20 +- .../plos-journals-api-from-r-rplos/index.html | 18 +- .../index.html | 18 +- .../index.html | 20 +- .../index.html | 20 +- .../07/bloggingtweeting-from-esa11/index.html | 18 +- .../2011/07/crdata-vs-cloudnumbers/index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- _site/2011/08/monday-at-esa11/index.html | 20 +- .../index.html | 18 +- .../index.html | 18 +- _site/2011/08/thursday-at-esa11/index.html | 18 +- _site/2011/08/wednesday-at-esa11/index.html | 18 +- .../09/data-visualization-book/index.html | 20 +- .../index.html | 20 +- _site/2011/09/figshare-talk/index.html | 20 +- .../index.html | 20 +- .../my-take-on-r-introduction-talk/index.html | 18 +- .../index.html | 20 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- _site/2011/10/new-food-web-dataset/index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- _site/2011/12/i-work-for-internet/index.html | 18 +- .../londonr-meetings-presentations/index.html | 18 +- _site/2011/12/recology-is-1-yr-old/index.html | 20 +- .../index.html | 18 +- _site/2012/01/RNetLogo/index.html | 18 +- .../flora-north-america-scraping/index.html | 18 +- .../index.html | 18 +- .../index.html | 18 +- _site/2012/01/phylogeny-resolution/index.html | 18 +- .../reviewing-peer-review-process/index.html | 18 +- _site/2012/01/testing-twitterfeed/index.html | 18 +- _site/2012/02/math-ecology-survey/index.html | 18 +- .../index.html | 18 +- _site/2012/03/mvabund/index.html | 18 +- _site/2012/05/opencpu-github-stats/index.html | 18 +- .../2012/06/recent-r-eeb-packages/index.html | 18 +- .../2012/07/global-names-resolver/index.html | 18 +- _site/2012/08/ecology-unconference/index.html | 18 +- .../08/get-ecoevo-journal-titles/index.html | 18 +- _site/2012/08/ggplot-inset-map/index.html | 18 +- _site/2012/08/making-matrices/index.html | 18 +- _site/2012/09/getting-data/index.html | 18 +- _site/2012/09/gov-dat/index.html | 18 +- _site/2012/09/rmetadata/index.html | 18 +- _site/2012/09/rvertnet/index.html | 18 +- _site/2012/10/R2G2-package/index.html | 18 +- _site/2012/10/get-taxa-downstream/index.html | 18 +- .../10/phylogenetic-tree-balance/index.html | 18 +- _site/2012/10/rgbif-newfxns/index.html | 18 +- _site/2012/11/altecology/index.html | 18 +- _site/2012/12/is-invasive/index.html | 18 +- _site/2012/12/shiny-r/index.html | 18 +- _site/2012/12/taxize/index.html | 18 +- _site/2013/01/api-token/index.html | 18 +- _site/2013/01/ifttt-test/index.html | 18 +- .../2013/01/open-science-challenge/index.html | 18 +- _site/2013/01/tnrs-use-case/index.html | 18 +- _site/2013/02/academia-reboot/index.html | 18 +- _site/2013/02/common-tree/index.html | 18 +- _site/2013/03/r-metadata/index.html | 18 +- .../2013/03/ropensci-collaboration/index.html | 18 +- _site/2013/05/rbison/index.html | 18 +- _site/2013/06/coffeehouse/index.html | 18 +- _site/2013/06/couch/index.html | 18 +- _site/2013/06/fylopic/index.html | 18 +- _site/2013/06/geojson/index.html | 18 +- _site/2013/06/sofa/index.html | 18 +- _site/2013/07/beyond-academia/index.html | 18 +- _site/2013/07/code/index.html | 18 +- _site/2013/07/r-ecology-workshop/index.html | 18 +- _site/2013/07/r-resources/index.html | 18 +- _site/2013/08/govdat-vignette/index.html | 18 +- _site/2013/08/phenology/index.html | 18 +- _site/2013/08/sciordata/index.html | 18 +- _site/2013/09/natenemies/index.html | 18 +- .../2013/09/taxonomy-in-three-acts/index.html | 18 +- _site/2013/10/codeinpapers/index.html | 18 +- _site/2013/10/shutdown/index.html | 18 +- _site/2013/11/jekyll-intro/index.html | 18 +- _site/2014/01/cites/index.html | 18 +- _site/2014/01/rgauges-hourly/index.html | 18 +- _site/2014/02/cowsay/index.html | 18 +- _site/2014/05/analogsea/index.html | 18 +- _site/2014/05/logplotreboot/index.html | 18 +- _site/2014/06/analogsea-v01/index.html | 18 +- _site/2014/08/rsunlight/index.html | 18 +- .../conditionality-meta-analysis/index.html | 18 +- _site/2014/11/ckanr-intro/index.html | 18 +- _site/2014/11/github-fun/index.html | 18 +- _site/2014/11/sofa/index.html | 18 +- _site/2014/11/taxize-1000/index.html | 18 +- _site/2014/12/altmetrics-anywhere/index.html | 18 +- _site/2014/12/http-codes/index.html | 18 +- _site/2014/12/icanhaz-altmetrics/index.html | 18 +- _site/2014/12/multi-handle/index.html | 18 +- _site/2014/12/museum-aamsf/index.html | 18 +- _site/2014/12/pytaxize-itis/index.html | 18 +- _site/2014/12/rplos-pubs-country/index.html | 18 +- _site/2014/12/taxize-workflows/index.html | 18 +- _site/2015/01/binomen/index.html | 18 +- _site/2015/01/discourse-in-r/index.html | 18 +- _site/2015/01/elasticsearch/index.html | 18 +- _site/2015/01/geojson-topojson-io/index.html | 18 +- _site/2015/01/gistr-github-gists/index.html | 18 +- _site/2015/01/httping/index.html | 18 +- .../elasticsearch-backup-restore/index.html | 20 +- _site/2015/02/package-dev/index.html | 18 +- _site/2015/02/secure-elasticsearch/index.html | 18 +- _site/2015/03/couch-dataframes/index.html | 18 +- _site/2015/03/csl-client/index.html | 18 +- _site/2015/03/faster-solr/index.html | 18 +- _site/2015/04/geojson-io/index.html | 18 +- _site/2015/04/the-new-way/index.html | 18 +- _site/2015/05/mow-the-lawn/index.html | 18 +- _site/2015/05/openadds/index.html | 18 +- _site/2015/06/idigbio-in-spocc/index.html | 18 +- _site/2015/06/rerddap/index.html | 18 +- .../07/weather-data-with-rnoaa/index.html | 18 +- _site/2015/08/full-text/index.html | 18 +- _site/2015/09/oai-client/index.html | 18 +- _site/2015/10/analogsea-cran/index.html | 18 +- _site/2015/10/noaa-isd/index.html | 18 +- _site/2015/10/open-source-metrics/index.html | 18 +- _site/2015/11/crossref-clients/index.html | 18 +- _site/2015/11/pygbif/index.html | 18 +- .../2015/12/binomen-taxonomy-tools/index.html | 420 +++ _site/archives/index.html | 419 +-- _site/atom.xml | 641 +++-- _site/feed.R.xml | 445 ++-- _site/feed.xml | 404 ++- _site/index.html | 444 ++-- _site/page10/index.html | 724 +++--- _site/page11/index.html | 816 +++--- _site/page12/index.html | 1420 +++++----- _site/page13/index.html | 1475 +++++++---- _site/page14/index.html | 876 ++++--- _site/page15/index.html | 955 ++++--- _site/page16/index.html | 727 ++++-- _site/page17/index.html | 231 +- _site/page18/index.html | 597 ++--- _site/page19/index.html | 250 +- _site/page2/index.html | 610 +++-- _site/page20/index.html | 380 +-- _site/page21/index.html | 824 ++++-- _site/page22/index.html | 998 ++++--- _site/page23/index.html | 271 +- _site/page24/index.html | 313 +-- _site/page25/index.html | 638 ++--- _site/page26/index.html | 822 ++++-- _site/page27/index.html | 740 ++++-- _site/page28/index.html | 654 +++-- _site/page29/index.html | 508 ++-- _site/page3/index.html | 2314 +++++++++-------- _site/page30/index.html | 657 +++-- _site/page31/index.html | 124 +- _site/page32/index.html | 92 +- _site/page33/index.html | 190 +- _site/page34/index.html | 113 +- _site/page35/index.html | 116 +- _site/page36/index.html | 39 +- _site/page37/index.html | 24 +- _site/page38/index.html | 24 +- _site/page39/index.html | 59 +- _site/page4/index.html | 1337 +++++----- _site/page40/index.html | 24 +- _site/page41/index.html | 35 +- _site/page42/index.html | 24 +- _site/page43/index.html | 30 +- _site/page44/index.html | 42 +- _site/page45/index.html | 24 +- _site/page46/index.html | 24 +- _site/page47/index.html | 24 +- _site/page48/index.html | 22 +- _site/page49/index.html | 181 +- _site/page5/index.html | 1602 ++++++++---- _site/page50/index.html | 258 +- _site/page51/index.html | 46 +- _site/page52/index.html | 24 +- _site/page53/index.html | 166 +- _site/page54/index.html | 193 +- _site/page55/index.html | 24 +- _site/page56/index.html | 24 +- _site/page57/index.html | 32 +- _site/page58/index.html | 48 +- _site/page59/index.html | 32 +- _site/page6/index.html | 907 ++++--- _site/page60/index.html | 45 +- _site/page61/index.html | 49 +- _site/page62/index.html | 125 + _site/page7/index.html | 369 +-- _site/page8/index.html | 2205 +++++++++------- _site/page9/index.html | 1749 +++++++------ _site/po | 0 _site/tags/API/index.html | 10 +- _site/tags/DataWithoutBorders/index.html | 10 +- _site/tags/Datasets/index.html | 8 + _site/tags/EC2/index.html | 8 + _site/tags/Ecology/index.html | 8 + _site/tags/Evolution/index.html | 8 + _site/tags/GBIF/index.html | 8 + _site/tags/GoogleVis/index.html | 14 +- _site/tags/ITIS/index.html | 10 +- _site/tags/KML/index.html | 8 + _site/tags/LondonR/index.html | 8 + _site/tags/Meetings/index.html | 8 + _site/tags/Methods/index.html | 12 +- _site/tags/NOAA/index.html | 8 + _site/tags/NetLogo/index.html | 8 + _site/tags/Networks/index.html | 8 + _site/tags/OAI-PMH/index.html | 8 + _site/tags/Papers/index.html | 8 + _site/tags/Phylogenetics/index.html | 8 + _site/tags/Picante/index.html | 8 + _site/tags/Policy/index.html | 8 + _site/tags/R/index.html | 33 +- _site/tags/RStudio/index.html | 8 + _site/tags/Statistics/index.html | 8 + _site/tags/USGS/index.html | 8 + _site/tags/abundance/index.html | 8 + _site/tags/academia/index.html | 8 + _site/tags/altmetrics/index.html | 8 + _site/tags/ape/index.html | 8 + _site/tags/ascii/index.html | 8 + _site/tags/begging/index.html | 8 + _site/tags/biodiversity/index.html | 8 + _site/tags/bipartite/index.html | 8 + _site/tags/blogger/index.html | 8 + _site/tags/car/index.html | 8 + _site/tags/citizen science/index.html | 8 + _site/tags/climate change/index.html | 8 + _site/tags/climate/index.html | 8 + _site/tags/cloud/index.html | 8 + _site/tags/cloudcomputing/index.html | 8 + _site/tags/code/index.html | 8 + _site/tags/codedev/index.html | 8 + _site/tags/community/index.html | 8 + _site/tags/conference/index.html | 8 + _site/tags/couchdb/index.html | 8 + _site/tags/crossref/index.html | 8 + _site/tags/crowdfunding/index.html | 8 + _site/tags/data management/index.html | 8 + _site/tags/data/index.html | 8 + _site/tags/database/index.html | 8 + _site/tags/datavisualization/index.html | 8 + _site/tags/development/index.html | 8 + _site/tags/digitize/index.html | 8 + _site/tags/doi/index.html | 8 + _site/tags/elasticsearch/index.html | 10 +- _site/tags/figshare/index.html | 8 + _site/tags/geiger/index.html | 8 + _site/tags/geojson/index.html | 8 + _site/tags/geospatial/index.html | 8 + _site/tags/ggplot2/index.html | 14 +- _site/tags/github/index.html | 8 + _site/tags/google earth/index.html | 8 + _site/tags/government/index.html | 8 + _site/tags/gridExtra/index.html | 8 + _site/tags/help/index.html | 8 + _site/tags/history/index.html | 8 + _site/tags/http/index.html | 8 + _site/tags/httr/index.html | 8 + _site/tags/igraph/index.html | 8 + _site/tags/index.html | 981 ++++++- _site/tags/inset/index.html | 8 + _site/tags/interactivegraphics/index.html | 8 + _site/tags/invasive/index.html | 8 + _site/tags/javascript/index.html | 8 + _site/tags/jekyll/index.html | 8 + _site/tags/jobs/index.html | 8 + _site/tags/journals/index.html | 8 + _site/tags/json/index.html | 8 + _site/tags/lattice/index.html | 8 + _site/tags/literature/index.html | 8 + _site/tags/lubridate/index.html | 8 + _site/tags/macroecology/index.html | 8 + _site/tags/map/index.html | 8 + _site/tags/mapping/index.html | 8 + _site/tags/maps/index.html | 8 + _site/tags/math/index.html | 8 + _site/tags/matrix/index.html | 8 + _site/tags/meta-analysis/index.html | 8 + _site/tags/metadata/index.html | 8 + _site/tags/motmot/index.html | 8 + _site/tags/news/index.html | 8 + _site/tags/nosql/index.html | 8 + _site/tags/nytimes/index.html | 8 + _site/tags/occurrence/index.html | 8 + _site/tags/online/index.html | 8 + _site/tags/open access/index.html | 8 + _site/tags/open source/index.html | 8 + _site/tags/openaccess/index.html | 20 +- _site/tags/opencpu.org/index.html | 8 + _site/tags/opendata/index.html | 8 + _site/tags/openstreetmaps/index.html | 8 + _site/tags/packages/index.html | 8 + _site/tags/peerreview/index.html | 8 + _site/tags/phangorn/index.html | 8 + _site/tags/phenology/index.html | 8 + _site/tags/phylocom/index.html | 8 + _site/tags/phylogenetic/index.html | 8 + _site/tags/phytools/index.html | 8 + _site/tags/plyr/index.html | 10 +- _site/tags/publications/index.html | 8 + _site/tags/publishing/index.html | 8 + _site/tags/python/index.html | 8 + _site/tags/regex/index.html | 8 + _site/tags/reshape2/index.html | 10 +- _site/tags/resolve/index.html | 8 + _site/tags/ritis/index.html | 8 + _site/tags/rmendeley/index.html | 8 + _site/tags/ropensci/index.html | 8 + _site/tags/rplos/index.html | 8 + _site/tags/ruby/index.html | 8 + _site/tags/science/index.html | 8 + _site/tags/sciencetalks/index.html | 18 +- _site/tags/scraping/index.html | 8 + _site/tags/security/index.html | 8 + _site/tags/segue/index.html | 8 + _site/tags/servers/index.html | 8 + _site/tags/shiny/index.html | 8 + _site/tags/simulation/index.html | 8 + _site/tags/sofa/index.html | 8 + _site/tags/solr/index.html | 8 + _site/tags/spatial/index.html | 8 + _site/tags/split-apply-combine/index.html | 1731 ++++++++++++ _site/tags/stringr/index.html | 8 + _site/tags/sunlightlabs/index.html | 8 + _site/tags/taxize/index.html | 10 +- _site/tags/taxonomic/index.html | 8 + _site/tags/taxonomy/index.html | 15 + _site/tags/test/index.html | 8 + _site/tags/text-mining/index.html | 8 + _site/tags/thoughts/index.html | 8 + _site/tags/topojson/index.html | 8 + _site/tags/transparency/index.html | 8 + _site/tags/tree shape/index.html | 8 + _site/tags/treebase/index.html | 8 + _site/tags/tutorials/index.html | 8 + _site/tags/twitteR/index.html | 8 + _site/tags/unconference/index.html | 8 + _site/tags/unfinished/index.html | 8 + _site/tags/vime/index.html | 10 +- _site/tags/vimeo/index.html | 10 +- _site/tags/visualization/index.html | 8 + _site/tags/vizualization/index.html | 8 + _site/tags/weather/index.html | 8 + _site/tags/web services/index.html | 8 + _site/tags/weecology/index.html | 8 + _site/tags/wordpress/index.html | 8 + 410 files changed, 24650 insertions(+), 14629 deletions(-) create mode 100644 _drafts/2015-12-08-binomen-taxonomy-tools.Rmd create mode 100644 _drafts/2015-12-08-binomen-taxonomy-tools.md create mode 100644 _posts/2015-12-08-binomen-taxonomy-tools.md rename _site/2011/{02 => 03}/rstudio/index.html (96%) create mode 100644 _site/2015/12/binomen-taxonomy-tools/index.html create mode 100644 _site/page62/index.html create mode 100644 _site/po create mode 100644 _site/tags/split-apply-combine/index.html diff --git a/_drafts/2015-12-08-binomen-taxonomy-tools.Rmd b/_drafts/2015-12-08-binomen-taxonomy-tools.Rmd new file mode 100644 index 0000000000..eefbee1f26 --- /dev/null +++ b/_drafts/2015-12-08-binomen-taxonomy-tools.Rmd @@ -0,0 +1,186 @@ +--- +name: binomen-taxonomy-tools +layout: post +title: binomen - Tools for slicing and dicing taxonomic names +date: 2015-12-08 +author: Scott Chamberlain +sourceslug: _drafts/2015-12-08-binomen-taxonomy-tools.Rmd +tags: +- R +- taxonomy +- split-apply-combine +--- + +```{r echo=FALSE} +knitr::opts_chunk$set( + comment = "#>", + collapse = TRUE, + warning = FALSE, + message = FALSE +) +``` + +The first version of `binomen` is now up on [CRAN][binomencran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to [taxize](https://github.com/ropensci/taxize), where you can get taxonomic data on taxonomic names from the web. + +The classes (S3): + +* `taxon` +* `taxonref` +* `taxonrefs` +* `binomial` +* `grouping` (i.e., classification - used different term to avoid conflict with classification in `taxize`) + +For example, the `binomial` class is defined by a genus, epithet, authority, and optional full species name and canonical version. + +```r +binomial("Poa", "annua", authority="L.") +``` + +```r + + genus: Poa + epithet: annua + canonical: + species: + authority: L. +``` + +The package has a suite of functions to work on these taxonomic classes: + +* `gethier()` - get hierarchy from a `taxon` class +* `scatter()` - make each row in taxonomic data.frame (`taxondf`) a separate `taxon` object within a single `taxa` object +* `assemble()` - make a `taxa` object into a `taxondf` data.frame +* `pick()` - pick out one or more taxonomic groups +* `pop()` - pop out (drop) one or more taxonomic groups +* `span()` - pick a range between two taxonomic groups (inclusive) +* `strain()` - filter by taxonomic groups, like dplyr's filter +* `name()` - get the taxon name for each `taxonref` object +* `uri()` - get the reference uri for each `taxonref` object +* `rank()` - get the taxonomic rank for each `taxonref` object +* `id()` - get the reference uri for each `taxonref` object + +The approach in this package I suppose is sort of like `split-apply-combine` from `plyr`/`dplyr`, whereas this is aims to make it easy to do with taxonomic names. + +## Install + +For examples below, you'll need the development version: + +```{r eval=FALSE} +install.packages("binomen") +``` + +```{r} +library("binomen") +``` + +## Make a taxon + +Make a taxon object + +```{r} +(obj <- make_taxon(genus="Poa", epithet="annua", authority="L.", + family='Poaceae', clazz='Poales', kingdom='Plantae', variety='annua')) +``` + +Index to various parts of the object + +The binomial + +```{r} +obj$binomial +``` + +The authority + +```{r} +obj$binomial$authority +``` + +The classification + +```{r} +obj$grouping +``` + +The family + +```{r} +obj$grouping$family +``` + +## Subset taxon objects + +Get one or more ranks via `pick()` + +```{r} +obj %>% pick(family) +obj %>% pick(family, genus) +``` + +Drop one or more ranks via `pop()` + +```{r} +obj %>% pop(family) +obj %>% pop(family, genus) +``` + +Get a range of ranks via `span()` + +```{r} +obj %>% span(kingdom, family) +``` + +Extract classification as a `data.frame` + +```{r} +gethier(obj) +``` + +## Taxonomic data.frame's + +Make one + +```{r} +df <- data.frame(order = c('Asterales','Asterales','Fagales','Poales','Poales','Poales'), + family = c('Asteraceae','Asteraceae','Fagaceae','Poaceae','Poaceae','Poaceae'), + genus = c('Helianthus','Helianthus','Quercus','Poa','Festuca','Holodiscus'), + stringsAsFactors = FALSE) +(df2 <- taxon_df(df)) +``` + +Parse - get rank order via `pick()` + +```{r} +df2 %>% pick(order) +``` + +get ranks order, family, and genus via `pick()` + +```{r} +df2 %>% pick(order, family, genus) +``` + +get range of names via `span()`, from rank `X` to rank `Y` + +```{r} +df2 %>% span(family, genus) +``` + +Separate each row into a `taxon` class (many `taxon` objects are a `taxa` class) + +```{r output.lines=1:20} +scatter(df2) +``` + +And you can re-assemble a data.frame from the output of `scatter()` with `assemble()` + +```{r} +out <- scatter(df2) +assemble(out) +``` + +## Thoughts? + +I'm really curious what people think of `binomen`. I'm not sure how useful this will be in the wild. Try it. Let me know. Thanks much :) + +[binomencran]: https://cran.rstudio.com/web/packages/binomen diff --git a/_drafts/2015-12-08-binomen-taxonomy-tools.md b/_drafts/2015-12-08-binomen-taxonomy-tools.md new file mode 100644 index 0000000000..f02adfcd76 --- /dev/null +++ b/_drafts/2015-12-08-binomen-taxonomy-tools.md @@ -0,0 +1,352 @@ +--- +name: binomen-taxonomy-tools +layout: post +title: binomen - Tools for slicing and dicing taxonomic names +date: 2015-12-08 +author: Scott Chamberlain +sourceslug: _drafts/2015-12-08-binomen-taxonomy-tools.Rmd +tags: +- R +- taxonomy +- split-apply-combine +--- + + + +The first version of `binomen` is now up on [CRAN][binomencran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to [taxize](https://github.com/ropensci/taxize), where you can get taxonomic data on taxonomic names from the web. + +The classes (S3): + +* `taxon` +* `taxonref` +* `taxonrefs` +* `binomial` +* `grouping` (i.e., classification - used different term to avoid conflict with classification in `taxize`) + +For example, the `binomial` class is defined by a genus, epithet, authority, and optional full species name and canonical version. + +```r +binomial("Poa", "annua", authority="L.") +``` + +```r + + genus: Poa + epithet: annua + canonical: + species: + authority: L. +``` + +The package has a suite of functions to work on these taxonomic classes: + +* `gethier()` - get hierarchy from a `taxon` class +* `scatter()` - make each row in taxonomic data.frame (`taxondf`) a separate `taxon` object within a single `taxa` object +* `assemble()` - make a `taxa` object into a `taxondf` data.frame +* `pick()` - pick out one or more taxonomic groups +* `pop()` - pop out (drop) one or more taxonomic groups +* `span()` - pick a range between two taxonomic groups (inclusive) +* `strain()` - filter by taxonomic groups, like dplyr's filter +* `name()` - get the taxon name for each `taxonref` object +* `uri()` - get the reference uri for each `taxonref` object +* `rank()` - get the taxonomic rank for each `taxonref` object +* `id()` - get the reference uri for each `taxonref` object + +The approach in this package I suppose is sort of like `split-apply-combine` from `plyr`/`dplyr`, whereas this is aims to make it easy to do with taxonomic names. + +## Install + +For examples below, you'll need the development version: + + +```r +install.packages("binomen") +``` + + +```r +library("binomen") +``` + +## Make a taxon + +Make a taxon object + + +```r +(obj <- make_taxon(genus="Poa", epithet="annua", authority="L.", + family='Poaceae', clazz='Poales', kingdom='Plantae', variety='annua')) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> family: Poaceae +#> genus: Poa +#> species: Poa annua +#> variety: annua +``` + +Index to various parts of the object + +The binomial + + +```r +obj$binomial +#> +#> genus: Poa +#> epithet: annua +#> canonical: Poa annua +#> species: Poa annua L. +#> authority: L. +``` + +The authority + + +```r +obj$binomial$authority +#> [1] "L." +``` + +The classification + + +```r +obj$grouping +#> +#> kingdom: Plantae +#> clazz: Poales +#> family: Poaceae +#> genus: Poa +#> species: Poa annua +#> variety: annua +``` + +The family + + +```r +obj$grouping$family +#> +#> rank: family +#> name: Poaceae +#> id: none +#> uri: none +``` + +## Subset taxon objects + +Get one or more ranks via `pick()` + + +```r +obj %>% pick(family) +#> +#> binomial: Poa annua +#> grouping: +#> family: Poaceae +obj %>% pick(family, genus) +#> +#> binomial: Poa annua +#> grouping: +#> family: Poaceae +#> genus: Poa +``` + +Drop one or more ranks via `pop()` + + +```r +obj %>% pop(family) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> genus: Poa +#> species: Poa annua +#> variety: annua +obj %>% pop(family, genus) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> species: Poa annua +#> variety: annua +``` + +Get a range of ranks via `span()` + + +```r +obj %>% span(kingdom, family) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> family: Poaceae +``` + +Extract classification as a `data.frame` + + +```r +gethier(obj) +#> rank name +#> 1 kingdom Plantae +#> 2 clazz Poales +#> 3 family Poaceae +#> 4 genus Poa +#> 5 species Poa annua +#> 6 variety annua +``` + +## Taxonomic data.frame's + +Make one + + +```r +df <- data.frame(order = c('Asterales','Asterales','Fagales','Poales','Poales','Poales'), + family = c('Asteraceae','Asteraceae','Fagaceae','Poaceae','Poaceae','Poaceae'), + genus = c('Helianthus','Helianthus','Quercus','Poa','Festuca','Holodiscus'), + stringsAsFactors = FALSE) +(df2 <- taxon_df(df)) +#> order family genus +#> 1 Asterales Asteraceae Helianthus +#> 2 Asterales Asteraceae Helianthus +#> 3 Fagales Fagaceae Quercus +#> 4 Poales Poaceae Poa +#> 5 Poales Poaceae Festuca +#> 6 Poales Poaceae Holodiscus +``` + +Parse - get rank order via `pick()` + + +```r +df2 %>% pick(order) +#> order +#> 1 Asterales +#> 2 Asterales +#> 3 Fagales +#> 4 Poales +#> 5 Poales +#> 6 Poales +``` + +get ranks order, family, and genus via `pick()` + + +```r +df2 %>% pick(order, family, genus) +#> order family genus +#> 1 Asterales Asteraceae Helianthus +#> 2 Asterales Asteraceae Helianthus +#> 3 Fagales Fagaceae Quercus +#> 4 Poales Poaceae Poa +#> 5 Poales Poaceae Festuca +#> 6 Poales Poaceae Holodiscus +``` + +get range of names via `span()`, from rank `X` to rank `Y` + + +```r +df2 %>% span(family, genus) +#> family genus +#> 1 Asteraceae Helianthus +#> 2 Asteraceae Helianthus +#> 3 Fagaceae Quercus +#> 4 Poaceae Poa +#> 5 Poaceae Festuca +#> 6 Poaceae Holodiscus +``` + +Separate each row into a `taxon` class (many `taxon` objects are a `taxa` class) + + +```r +scatter(df2) +#> [[1]] +#> +#> binomial: Helianthus none +#> grouping: +#> order: Asterales +#> family: Asteraceae +#> genus: Helianthus +#> species: Helianthus none +#> +#> [[2]] +#> +#> binomial: Helianthus none +#> grouping: +#> order: Asterales +#> family: Asteraceae +#> genus: Helianthus +#> species: Helianthus none +#> +#> [[3]] +#> +#> binomial: Quercus none +#> grouping: +#> order: Fagales +#> family: Fagaceae +#> genus: Quercus +#> species: Quercus none +#> +#> [[4]] +#> +#> binomial: Poa none +#> grouping: +#> order: Poales +#> family: Poaceae +#> genus: Poa +#> species: Poa none +#> +#> [[5]] +#> +#> binomial: Festuca none +#> grouping: +#> order: Poales +#> family: Poaceae +#> genus: Festuca +#> species: Festuca none +#> +#> [[6]] +#> +#> binomial: Holodiscus none +#> grouping: +#> order: Poales +#> family: Poaceae +#> genus: Holodiscus +#> species: Holodiscus none +#> +#> attr(,"class") +#> [1] "taxa" +``` + +And you can re-assemble a data.frame from the output of `scatter()` with `assemble()` + + +```r +out <- scatter(df2) +assemble(out) +#> order family genus species +#> 1 Asterales Asteraceae Helianthus Helianthus none +#> 2 Asterales Asteraceae Helianthus Helianthus none +#> 3 Fagales Fagaceae Quercus Quercus none +#> 4 Poales Poaceae Poa Poa none +#> 5 Poales Poaceae Festuca Festuca none +#> 6 Poales Poaceae Holodiscus Holodiscus none +``` + +## Thoughts? + +I'm really curious what people think of `binomen`. I'm not sure how useful this will be in the wild. Try it. Let me know. Thanks much :) + +[binomencran]: https://cran.rstudio.com/web/packages/binomen diff --git a/_posts/2015-12-08-binomen-taxonomy-tools.md b/_posts/2015-12-08-binomen-taxonomy-tools.md new file mode 100644 index 0000000000..f02adfcd76 --- /dev/null +++ b/_posts/2015-12-08-binomen-taxonomy-tools.md @@ -0,0 +1,352 @@ +--- +name: binomen-taxonomy-tools +layout: post +title: binomen - Tools for slicing and dicing taxonomic names +date: 2015-12-08 +author: Scott Chamberlain +sourceslug: _drafts/2015-12-08-binomen-taxonomy-tools.Rmd +tags: +- R +- taxonomy +- split-apply-combine +--- + + + +The first version of `binomen` is now up on [CRAN][binomencran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to [taxize](https://github.com/ropensci/taxize), where you can get taxonomic data on taxonomic names from the web. + +The classes (S3): + +* `taxon` +* `taxonref` +* `taxonrefs` +* `binomial` +* `grouping` (i.e., classification - used different term to avoid conflict with classification in `taxize`) + +For example, the `binomial` class is defined by a genus, epithet, authority, and optional full species name and canonical version. + +```r +binomial("Poa", "annua", authority="L.") +``` + +```r + + genus: Poa + epithet: annua + canonical: + species: + authority: L. +``` + +The package has a suite of functions to work on these taxonomic classes: + +* `gethier()` - get hierarchy from a `taxon` class +* `scatter()` - make each row in taxonomic data.frame (`taxondf`) a separate `taxon` object within a single `taxa` object +* `assemble()` - make a `taxa` object into a `taxondf` data.frame +* `pick()` - pick out one or more taxonomic groups +* `pop()` - pop out (drop) one or more taxonomic groups +* `span()` - pick a range between two taxonomic groups (inclusive) +* `strain()` - filter by taxonomic groups, like dplyr's filter +* `name()` - get the taxon name for each `taxonref` object +* `uri()` - get the reference uri for each `taxonref` object +* `rank()` - get the taxonomic rank for each `taxonref` object +* `id()` - get the reference uri for each `taxonref` object + +The approach in this package I suppose is sort of like `split-apply-combine` from `plyr`/`dplyr`, whereas this is aims to make it easy to do with taxonomic names. + +## Install + +For examples below, you'll need the development version: + + +```r +install.packages("binomen") +``` + + +```r +library("binomen") +``` + +## Make a taxon + +Make a taxon object + + +```r +(obj <- make_taxon(genus="Poa", epithet="annua", authority="L.", + family='Poaceae', clazz='Poales', kingdom='Plantae', variety='annua')) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> family: Poaceae +#> genus: Poa +#> species: Poa annua +#> variety: annua +``` + +Index to various parts of the object + +The binomial + + +```r +obj$binomial +#> +#> genus: Poa +#> epithet: annua +#> canonical: Poa annua +#> species: Poa annua L. +#> authority: L. +``` + +The authority + + +```r +obj$binomial$authority +#> [1] "L." +``` + +The classification + + +```r +obj$grouping +#> +#> kingdom: Plantae +#> clazz: Poales +#> family: Poaceae +#> genus: Poa +#> species: Poa annua +#> variety: annua +``` + +The family + + +```r +obj$grouping$family +#> +#> rank: family +#> name: Poaceae +#> id: none +#> uri: none +``` + +## Subset taxon objects + +Get one or more ranks via `pick()` + + +```r +obj %>% pick(family) +#> +#> binomial: Poa annua +#> grouping: +#> family: Poaceae +obj %>% pick(family, genus) +#> +#> binomial: Poa annua +#> grouping: +#> family: Poaceae +#> genus: Poa +``` + +Drop one or more ranks via `pop()` + + +```r +obj %>% pop(family) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> genus: Poa +#> species: Poa annua +#> variety: annua +obj %>% pop(family, genus) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> species: Poa annua +#> variety: annua +``` + +Get a range of ranks via `span()` + + +```r +obj %>% span(kingdom, family) +#> +#> binomial: Poa annua +#> grouping: +#> kingdom: Plantae +#> clazz: Poales +#> family: Poaceae +``` + +Extract classification as a `data.frame` + + +```r +gethier(obj) +#> rank name +#> 1 kingdom Plantae +#> 2 clazz Poales +#> 3 family Poaceae +#> 4 genus Poa +#> 5 species Poa annua +#> 6 variety annua +``` + +## Taxonomic data.frame's + +Make one + + +```r +df <- data.frame(order = c('Asterales','Asterales','Fagales','Poales','Poales','Poales'), + family = c('Asteraceae','Asteraceae','Fagaceae','Poaceae','Poaceae','Poaceae'), + genus = c('Helianthus','Helianthus','Quercus','Poa','Festuca','Holodiscus'), + stringsAsFactors = FALSE) +(df2 <- taxon_df(df)) +#> order family genus +#> 1 Asterales Asteraceae Helianthus +#> 2 Asterales Asteraceae Helianthus +#> 3 Fagales Fagaceae Quercus +#> 4 Poales Poaceae Poa +#> 5 Poales Poaceae Festuca +#> 6 Poales Poaceae Holodiscus +``` + +Parse - get rank order via `pick()` + + +```r +df2 %>% pick(order) +#> order +#> 1 Asterales +#> 2 Asterales +#> 3 Fagales +#> 4 Poales +#> 5 Poales +#> 6 Poales +``` + +get ranks order, family, and genus via `pick()` + + +```r +df2 %>% pick(order, family, genus) +#> order family genus +#> 1 Asterales Asteraceae Helianthus +#> 2 Asterales Asteraceae Helianthus +#> 3 Fagales Fagaceae Quercus +#> 4 Poales Poaceae Poa +#> 5 Poales Poaceae Festuca +#> 6 Poales Poaceae Holodiscus +``` + +get range of names via `span()`, from rank `X` to rank `Y` + + +```r +df2 %>% span(family, genus) +#> family genus +#> 1 Asteraceae Helianthus +#> 2 Asteraceae Helianthus +#> 3 Fagaceae Quercus +#> 4 Poaceae Poa +#> 5 Poaceae Festuca +#> 6 Poaceae Holodiscus +``` + +Separate each row into a `taxon` class (many `taxon` objects are a `taxa` class) + + +```r +scatter(df2) +#> [[1]] +#> +#> binomial: Helianthus none +#> grouping: +#> order: Asterales +#> family: Asteraceae +#> genus: Helianthus +#> species: Helianthus none +#> +#> [[2]] +#> +#> binomial: Helianthus none +#> grouping: +#> order: Asterales +#> family: Asteraceae +#> genus: Helianthus +#> species: Helianthus none +#> +#> [[3]] +#> +#> binomial: Quercus none +#> grouping: +#> order: Fagales +#> family: Fagaceae +#> genus: Quercus +#> species: Quercus none +#> +#> [[4]] +#> +#> binomial: Poa none +#> grouping: +#> order: Poales +#> family: Poaceae +#> genus: Poa +#> species: Poa none +#> +#> [[5]] +#> +#> binomial: Festuca none +#> grouping: +#> order: Poales +#> family: Poaceae +#> genus: Festuca +#> species: Festuca none +#> +#> [[6]] +#> +#> binomial: Holodiscus none +#> grouping: +#> order: Poales +#> family: Poaceae +#> genus: Holodiscus +#> species: Holodiscus none +#> +#> attr(,"class") +#> [1] "taxa" +``` + +And you can re-assemble a data.frame from the output of `scatter()` with `assemble()` + + +```r +out <- scatter(df2) +assemble(out) +#> order family genus species +#> 1 Asterales Asteraceae Helianthus Helianthus none +#> 2 Asterales Asteraceae Helianthus Helianthus none +#> 3 Fagales Fagaceae Quercus Quercus none +#> 4 Poales Poaceae Poa Poa none +#> 5 Poales Poaceae Festuca Festuca none +#> 6 Poales Poaceae Holodiscus Holodiscus none +``` + +## Thoughts? + +I'm really curious what people think of `binomen`. I'm not sure how useful this will be in the wild. Try it. Let me know. Thanks much :) + +[binomencran]: https://cran.rstudio.com/web/packages/binomen diff --git a/_site/2010/12/a-new-blog-about-using-r-for-ecology-and-evolution/index.html b/_site/2010/12/a-new-blog-about-using-r-for-ecology-and-evolution/index.html index 475ddb880b..cd3e22af80 100644 --- a/_site/2010/12/a-new-blog-about-using-r-for-ecology-and-evolution/index.html +++ b/_site/2010/12/a-new-blog-about-using-r-for-ecology-and-evolution/index.html @@ -88,27 +88,27 @@

Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2010/12/ngram-ecological-terms/index.html b/_site/2010/12/ngram-ecological-terms/index.html index 0856866522..f6e4cc26ac 100644 --- a/_site/2010/12/ngram-ecological-terms/index.html +++ b/_site/2010/12/ngram-ecological-terms/index.html @@ -57,7 +57,7 @@

    Recology

    Ngram ecological terms

      - +  
    @@ -90,27 +90,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2010/12/phylogenetic-meta-analysis-in-r-using-phylometa/index.html b/_site/2010/12/phylogenetic-meta-analysis-in-r-using-phylometa/index.html index e04c50d343..d9394d2147 100644 --- a/_site/2010/12/phylogenetic-meta-analysis-in-r-using-phylometa/index.html +++ b/_site/2010/12/phylogenetic-meta-analysis-in-r-using-phylometa/index.html @@ -94,27 +94,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/bipartite-networks-and-r/index.html b/_site/2011/01/bipartite-networks-and-r/index.html index 6675bfb51b..4433aa31da 100644 --- a/_site/2011/01/bipartite-networks-and-r/index.html +++ b/_site/2011/01/bipartite-networks-and-r/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/ecological-networks-from-abundance/index.html b/_site/2011/01/ecological-networks-from-abundance/index.html index 166df71199..b5d16eddf2 100644 --- a/_site/2011/01/ecological-networks-from-abundance/index.html +++ b/_site/2011/01/ecological-networks-from-abundance/index.html @@ -96,27 +96,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/good-riddance-to-excel-pivot-tables/index.html b/_site/2011/01/good-riddance-to-excel-pivot-tables/index.html index 6ed8f2ec7a..d91be5a11e 100644 --- a/_site/2011/01/good-riddance-to-excel-pivot-tables/index.html +++ b/_site/2011/01/good-riddance-to-excel-pivot-tables/index.html @@ -57,7 +57,7 @@

    Recology

    Good riddance to Excel pivot tables

      - +   plyr reshape2 R
    @@ -96,27 +96,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/just-for-fun-recoverygov-data-snooping/index.html b/_site/2011/01/just-for-fun-recoverygov-data-snooping/index.html index 4b996faebf..c33607ee2c 100644 --- a/_site/2011/01/just-for-fun-recoverygov-data-snooping/index.html +++ b/_site/2011/01/just-for-fun-recoverygov-data-snooping/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/new-approach-to-analysis-of-phylogenetic-community-structure/index.html b/_site/2011/01/new-approach-to-analysis-of-phylogenetic-community-structure/index.html index 6c83aab2cf..70ff5c9329 100644 --- a/_site/2011/01/new-approach-to-analysis-of-phylogenetic-community-structure/index.html +++ b/_site/2011/01/new-approach-to-analysis-of-phylogenetic-community-structure/index.html @@ -94,27 +94,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/r-and-google-visualization-api-fish/index.html b/_site/2011/01/r-and-google-visualization-api-fish/index.html index c505054bac..aa26b6fe1a 100644 --- a/_site/2011/01/r-and-google-visualization-api-fish/index.html +++ b/_site/2011/01/r-and-google-visualization-api-fish/index.html @@ -57,7 +57,7 @@

    Recology

    R and Google Visualization API: Fish harvests

      - +   ggplot2 Ecology GoogleVis R Datasets
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/r-and-google-visualization-api-wikispeedia/index.html b/_site/2011/01/r-and-google-visualization-api-wikispeedia/index.html index a638983114..ce1408474e 100644 --- a/_site/2011/01/r-and-google-visualization-api-wikispeedia/index.html +++ b/_site/2011/01/r-and-google-visualization-api-wikispeedia/index.html @@ -57,7 +57,7 @@

    Recology

    R and Google Visualization API: Wikispeedia

      - +   GoogleVis R
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/r-and-google-visualization-api/index.html b/_site/2011/01/r-and-google-visualization-api/index.html index 12e68fc6ed..86a3fda64f 100644 --- a/_site/2011/01/r-and-google-visualization-api/index.html +++ b/_site/2011/01/r-and-google-visualization-api/index.html @@ -57,7 +57,7 @@

    Recology

    R and Google Visualization API

      - +   Ecology GoogleVis R
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/01/r-bloggers/index.html b/_site/2011/01/r-bloggers/index.html index 9835a6a6f6..8406361aa7 100644 --- a/_site/2011/01/r-bloggers/index.html +++ b/_site/2011/01/r-bloggers/index.html @@ -90,27 +90,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/farmer-s-markets-data/index.html b/_site/2011/02/farmer-s-markets-data/index.html index bfe7e9f0fb..b3f4887513 100644 --- a/_site/2011/02/farmer-s-markets-data/index.html +++ b/_site/2011/02/farmer-s-markets-data/index.html @@ -57,7 +57,7 @@

    Recology

    Farmer's markets data

      - +   ggplot2 Datasets
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/phenotypic-selection-analysis-in-r/index.html b/_site/2011/02/phenotypic-selection-analysis-in-r/index.html index 91cef5957e..17b9f4e133 100644 --- a/_site/2011/02/phenotypic-selection-analysis-in-r/index.html +++ b/_site/2011/02/phenotypic-selection-analysis-in-r/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/phylogenetic-analysis-with-the-phangorn-package-an-example/index.html b/_site/2011/02/phylogenetic-analysis-with-the-phangorn-package-an-example/index.html index 8b8bb0e4f6..6b1706ddbf 100644 --- a/_site/2011/02/phylogenetic-analysis-with-the-phangorn-package-an-example/index.html +++ b/_site/2011/02/phylogenetic-analysis-with-the-phangorn-package-an-example/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/plants-are-less-sex-deprived-when-next-to-closely-related-neighbors/index.html b/_site/2011/02/plants-are-less-sex-deprived-when-next-to-closely-related-neighbors/index.html index 667d9794a5..6fb52a7d9b 100644 --- a/_site/2011/02/plants-are-less-sex-deprived-when-next-to-closely-related-neighbors/index.html +++ b/_site/2011/02/plants-are-less-sex-deprived-when-next-to-closely-related-neighbors/index.html @@ -57,7 +57,7 @@

    Recology

    Plants are less sex deprived when next to closely related neighbors

      - +  
    @@ -96,27 +96,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/r-overtakes-sas-in-popularity/index.html b/_site/2011/02/r-overtakes-sas-in-popularity/index.html index 1ca49124ab..3dc2bfe059 100644 --- a/_site/2011/02/r-overtakes-sas-in-popularity/index.html +++ b/_site/2011/02/r-overtakes-sas-in-popularity/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/troubling-news-for-teaching-of/index.html b/_site/2011/02/troubling-news-for-teaching-of/index.html index 1d494ac52d..2f655cb3e1 100644 --- a/_site/2011/02/troubling-news-for-teaching-of/index.html +++ b/_site/2011/02/troubling-news-for-teaching-of/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/basic-ggplot2-network-graphs-ver2/index.html b/_site/2011/03/basic-ggplot2-network-graphs-ver2/index.html index ec8dd3ef28..903c2e373e 100644 --- a/_site/2011/03/basic-ggplot2-network-graphs-ver2/index.html +++ b/_site/2011/03/basic-ggplot2-network-graphs-ver2/index.html @@ -230,27 +230,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/basic-ggplot2-network-graphs/index.html b/_site/2011/03/basic-ggplot2-network-graphs/index.html index 4c7606c7e6..b3d63b7a8d 100644 --- a/_site/2011/03/basic-ggplot2-network-graphs/index.html +++ b/_site/2011/03/basic-ggplot2-network-graphs/index.html @@ -99,27 +99,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/bio-oracle/index.html b/_site/2011/03/bio-oracle/index.html index 749ad55689..5d17e28e4d 100644 --- a/_site/2011/03/bio-oracle/index.html +++ b/_site/2011/03/bio-oracle/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/check-out-phyloseminar-org/index.html b/_site/2011/03/check-out-phyloseminar-org/index.html index d3f24a9f20..a4a2fe8bae 100644 --- a/_site/2011/03/check-out-phyloseminar-org/index.html +++ b/_site/2011/03/check-out-phyloseminar-org/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/cloudnumbers-com/index.html b/_site/2011/03/cloudnumbers-com/index.html index dfe86df2dd..92155a0aeb 100644 --- a/_site/2011/03/cloudnumbers-com/index.html +++ b/_site/2011/03/cloudnumbers-com/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/for-all-your-pairwise-comparison-needs/index.html b/_site/2011/03/for-all-your-pairwise-comparison-needs/index.html index 79010cddf4..0a2f3fc333 100644 --- a/_site/2011/03/for-all-your-pairwise-comparison-needs/index.html +++ b/_site/2011/03/for-all-your-pairwise-comparison-needs/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/02/rstudio/index.html b/_site/2011/03/rstudio/index.html similarity index 96% rename from _site/2011/02/rstudio/index.html rename to _site/2011/03/rstudio/index.html index 45b30a433b..b4d5977c0f 100644 --- a/_site/2011/02/rstudio/index.html +++ b/_site/2011/03/rstudio/index.html @@ -57,7 +57,7 @@

    Recology

    RStudio

      - +  
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/03/species-abundance-distributions-and/index.html b/_site/2011/03/species-abundance-distributions-and/index.html index c1254f6047..433f0ede92 100644 --- a/_site/2011/03/species-abundance-distributions-and/index.html +++ b/_site/2011/03/species-abundance-distributions-and/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/04/adjust-branch-lengths-with-node-ages-comparison-of-two-methods/index.html b/_site/2011/04/adjust-branch-lengths-with-node-ages-comparison-of-two-methods/index.html index a4c4d62dd2..8a853c3046 100644 --- a/_site/2011/04/adjust-branch-lengths-with-node-ages-comparison-of-two-methods/index.html +++ b/_site/2011/04/adjust-branch-lengths-with-node-ages-comparison-of-two-methods/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/04/phylometa-from-r-randomization-via-tip-shuffle/index.html b/_site/2011/04/phylometa-from-r-randomization-via-tip-shuffle/index.html index 0eccf93570..c74b00be0a 100644 --- a/_site/2011/04/phylometa-from-r-randomization-via-tip-shuffle/index.html +++ b/_site/2011/04/phylometa-from-r-randomization-via-tip-shuffle/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/04/phylometa-from-r-udpate/index.html b/_site/2011/04/phylometa-from-r-udpate/index.html index 73abef549b..0778195a03 100644 --- a/_site/2011/04/phylometa-from-r-udpate/index.html +++ b/_site/2011/04/phylometa-from-r-udpate/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/04/processing-nested-lists/index.html b/_site/2011/04/processing-nested-lists/index.html index 502e18f766..33bfaaacfa 100644 --- a/_site/2011/04/processing-nested-lists/index.html +++ b/_site/2011/04/processing-nested-lists/index.html @@ -57,7 +57,7 @@

    Recology

    Processing nested lists

      - +   R
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/04/rstudio-beta-2-is-out/index.html b/_site/2011/04/rstudio-beta-2-is-out/index.html index 19fba4eca3..da59cee5d7 100644 --- a/_site/2011/04/rstudio-beta-2-is-out/index.html +++ b/_site/2011/04/rstudio-beta-2-is-out/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/04/running-phylip-s-contrast-application-for-trait-pairs-from-r/index.html b/_site/2011/04/running-phylip-s-contrast-application-for-trait-pairs-from-r/index.html index 82cf94a46b..4d6312c4a8 100644 --- a/_site/2011/04/running-phylip-s-contrast-application-for-trait-pairs-from-r/index.html +++ b/_site/2011/04/running-phylip-s-contrast-application-for-trait-pairs-from-r/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/comparison-of-functions-for-comparative-phylogenetics/index.html b/_site/2011/05/comparison-of-functions-for-comparative-phylogenetics/index.html index bd294652ba..1b0808113b 100644 --- a/_site/2011/05/comparison-of-functions-for-comparative-phylogenetics/index.html +++ b/_site/2011/05/comparison-of-functions-for-comparative-phylogenetics/index.html @@ -96,27 +96,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/google-reader/index.html b/_site/2011/05/google-reader/index.html index 9423ce3d5c..3a9a8bc9d6 100644 --- a/_site/2011/05/google-reader/index.html +++ b/_site/2011/05/google-reader/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/phylogenetic-signal-simulations/index.html b/_site/2011/05/phylogenetic-signal-simulations/index.html index 0ecf9a6392..3c966984f8 100644 --- a/_site/2011/05/phylogenetic-signal-simulations/index.html +++ b/_site/2011/05/phylogenetic-signal-simulations/index.html @@ -108,27 +108,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/plyrs-idataframe-vs-dataframe/index.html b/_site/2011/05/plyrs-idataframe-vs-dataframe/index.html index d5a0535d2a..0a49f26f8c 100644 --- a/_site/2011/05/plyrs-idataframe-vs-dataframe/index.html +++ b/_site/2011/05/plyrs-idataframe-vs-dataframe/index.html @@ -216,27 +216,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/rhipe-package-in-r-for-interfacing-between-hadoop-and-r/index.html b/_site/2011/05/rhipe-package-in-r-for-interfacing-between-hadoop-and-r/index.html index 91b2e6b328..9a827754fe 100644 --- a/_site/2011/05/rhipe-package-in-r-for-interfacing-between-hadoop-and-r/index.html +++ b/_site/2011/05/rhipe-package-in-r-for-interfacing-between-hadoop-and-r/index.html @@ -57,7 +57,7 @@

    Recology

    RHIPE package in R for interfacing between Hadoop and R

      - +   Methods
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/simple-function-for-plotting/index.html b/_site/2011/05/simple-function-for-plotting/index.html index e02974c588..df1bcadd7c 100644 --- a/_site/2011/05/simple-function-for-plotting/index.html +++ b/_site/2011/05/simple-function-for-plotting/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/05/treebase-trees-from-r/index.html b/_site/2011/05/treebase-trees-from-r/index.html index 1f94a8cf9c..97a73e8e73 100644 --- a/_site/2011/05/treebase-trees-from-r/index.html +++ b/_site/2011/05/treebase-trees-from-r/index.html @@ -104,27 +104,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/10000-visits-to-my-recology/index.html b/_site/2011/06/10000-visits-to-my-recology/index.html index 2ae302e4c1..df2c02865a 100644 --- a/_site/2011/06/10000-visits-to-my-recology/index.html +++ b/_site/2011/06/10000-visits-to-my-recology/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/ggplot2-talk-by-hadley-whickam-at-google/index.html b/_site/2011/06/ggplot2-talk-by-hadley-whickam-at-google/index.html index 177b004d91..50fbf8458c 100644 --- a/_site/2011/06/ggplot2-talk-by-hadley-whickam-at-google/index.html +++ b/_site/2011/06/ggplot2-talk-by-hadley-whickam-at-google/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/how-to-fit-power-laws/index.html b/_site/2011/06/how-to-fit-power-laws/index.html index 5adae3e3e8..8733ff7edb 100644 --- a/_site/2011/06/how-to-fit-power-laws/index.html +++ b/_site/2011/06/how-to-fit-power-laws/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/ievobio-2011-synopsis/index.html b/_site/2011/06/ievobio-2011-synopsis/index.html index 605cfe0639..301c2f03dd 100644 --- a/_site/2011/06/ievobio-2011-synopsis/index.html +++ b/_site/2011/06/ievobio-2011-synopsis/index.html @@ -57,7 +57,7 @@

    Recology

    iEvoBio 2011 Synopsis

      - +   openaccess Meetings Methods Ecology Phylogenetics R Datasets
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/just-turned-on-mobile-template-for-this-blog-let-me-know-if-it-works/index.html b/_site/2011/06/just-turned-on-mobile-template-for-this-blog-let-me-know-if-it-works/index.html index d90630b3f0..acf86b5c5b 100644 --- a/_site/2011/06/just-turned-on-mobile-template-for-this-blog-let-me-know-if-it-works/index.html +++ b/_site/2011/06/just-turned-on-mobile-template-for-this-blog-let-me-know-if-it-works/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/openstates-from-r-via-api-watch-your-elected-representatives/index.html b/_site/2011/06/openstates-from-r-via-api-watch-your-elected-representatives/index.html index 2dcff8dfe4..4ec2f53d03 100644 --- a/_site/2011/06/openstates-from-r-via-api-watch-your-elected-representatives/index.html +++ b/_site/2011/06/openstates-from-r-via-api-watch-your-elected-representatives/index.html @@ -57,7 +57,7 @@

    Recology

    OpenStates from R via API: watch your elected representatives

      - +   R Datasets
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/plos-journals-api-from-r-rplos/index.html b/_site/2011/06/plos-journals-api-from-r-rplos/index.html index 0811660554..b4bc304afa 100644 --- a/_site/2011/06/plos-journals-api-from-r-rplos/index.html +++ b/_site/2011/06/plos-journals-api-from-r-rplos/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/rbold-an-r-interface-for-bold-systems-barcode-repository/index.html b/_site/2011/06/rbold-an-r-interface-for-bold-systems-barcode-repository/index.html index cc7b1924d1..c0cc475033 100644 --- a/_site/2011/06/rbold-an-r-interface-for-bold-systems-barcode-repository/index.html +++ b/_site/2011/06/rbold-an-r-interface-for-bold-systems-barcode-repository/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/06/searching-itis-and-fetching-phylomatic-trees/index.html b/_site/2011/06/searching-itis-and-fetching-phylomatic-trees/index.html index e5f81bd91f..09fbc8a4d4 100644 --- a/_site/2011/06/searching-itis-and-fetching-phylomatic-trees/index.html +++ b/_site/2011/06/searching-itis-and-fetching-phylomatic-trees/index.html @@ -57,7 +57,7 @@

    Recology

    searching ITIS and fetching Phylomatic trees

      - +   Phylogenetics taxize R ITIS
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/07/archiving-ecologyevolution-data-sets/index.html b/_site/2011/07/archiving-ecologyevolution-data-sets/index.html index be064d61ac..131e012670 100644 --- a/_site/2011/07/archiving-ecologyevolution-data-sets/index.html +++ b/_site/2011/07/archiving-ecologyevolution-data-sets/index.html @@ -57,7 +57,7 @@

    Recology

    Archiving ecology/evolution data sets online

      - +   openaccess Datasets
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/07/bloggingtweeting-from-esa11/index.html b/_site/2011/07/bloggingtweeting-from-esa11/index.html index e893cee54e..7106aeab7b 100644 --- a/_site/2011/07/bloggingtweeting-from-esa11/index.html +++ b/_site/2011/07/bloggingtweeting-from-esa11/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/07/crdata-vs-cloudnumbers/index.html b/_site/2011/07/crdata-vs-cloudnumbers/index.html index 26c73ec1ef..f27872b661 100644 --- a/_site/2011/07/crdata-vs-cloudnumbers/index.html +++ b/_site/2011/07/crdata-vs-cloudnumbers/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/07/models-in-evolutionary-ecology-seminar/index.html b/_site/2011/07/models-in-evolutionary-ecology-seminar/index.html index 08336b10e6..b8dec35433 100644 --- a/_site/2011/07/models-in-evolutionary-ecology-seminar/index.html +++ b/_site/2011/07/models-in-evolutionary-ecology-seminar/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/08/esa11-ropensci-collaborative-effort-to-08/index.html b/_site/2011/08/esa11-ropensci-collaborative-effort-to-08/index.html index 0017d8137f..09453f0ff3 100644 --- a/_site/2011/08/esa11-ropensci-collaborative-effort-to-08/index.html +++ b/_site/2011/08/esa11-ropensci-collaborative-effort-to-08/index.html @@ -91,27 +91,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/08/monday-at-esa11/index.html b/_site/2011/08/monday-at-esa11/index.html index 173d80a933..ed689a4143 100644 --- a/_site/2011/08/monday-at-esa11/index.html +++ b/_site/2011/08/monday-at-esa11/index.html @@ -57,7 +57,7 @@

    Recology

    Monday at #ESA11

      - +   openaccess Meetings Evolution Ecology sciencetalks
    @@ -91,27 +91,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/08/rnpn-r-interface-for-national-phenology/index.html b/_site/2011/08/rnpn-r-interface-for-national-phenology/index.html index a0229bbd60..759a33747e 100644 --- a/_site/2011/08/rnpn-r-interface-for-national-phenology/index.html +++ b/_site/2011/08/rnpn-r-interface-for-national-phenology/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/08/tenure-track-position-in-systematics-at/index.html b/_site/2011/08/tenure-track-position-in-systematics-at/index.html index 4a143217d3..786571308b 100644 --- a/_site/2011/08/tenure-track-position-in-systematics-at/index.html +++ b/_site/2011/08/tenure-track-position-in-systematics-at/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/08/thursday-at-esa11/index.html b/_site/2011/08/thursday-at-esa11/index.html index 047258aeaa..fa5a144819 100644 --- a/_site/2011/08/thursday-at-esa11/index.html +++ b/_site/2011/08/thursday-at-esa11/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/08/wednesday-at-esa11/index.html b/_site/2011/08/wednesday-at-esa11/index.html index 24ef0e7674..1f3fe36c61 100644 --- a/_site/2011/08/wednesday-at-esa11/index.html +++ b/_site/2011/08/wednesday-at-esa11/index.html @@ -94,27 +94,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/data-visualization-book/index.html b/_site/2011/09/data-visualization-book/index.html index 2e3121f274..d4b0e7137d 100644 --- a/_site/2011/09/data-visualization-book/index.html +++ b/_site/2011/09/data-visualization-book/index.html @@ -57,7 +57,7 @@

    Recology

    A Data Visualization Book

      - +  
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/drewconway-interview-on-datanoborders/index.html b/_site/2011/09/drewconway-interview-on-datanoborders/index.html index f4933d2e04..d533d78939 100644 --- a/_site/2011/09/drewconway-interview-on-datanoborders/index.html +++ b/_site/2011/09/drewconway-interview-on-datanoborders/index.html @@ -57,7 +57,7 @@

    Recology

    @drewconway interview on @DataNoBorders at the Strata conference

      - +   openaccess API DataWithoutBorders sciencetalks
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/figshare-talk/index.html b/_site/2011/09/figshare-talk/index.html index f8dfb7b248..72b855bab1 100644 --- a/_site/2011/09/figshare-talk/index.html +++ b/_site/2011/09/figshare-talk/index.html @@ -57,7 +57,7 @@

    Recology

    FigShare Talk

      - +   openaccess vime sciencetalks
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/jonathan-eisen-on-fourth-domain-and/index.html b/_site/2011/09/jonathan-eisen-on-fourth-domain-and/index.html index b834ec27bd..4fdaf00c50 100644 --- a/_site/2011/09/jonathan-eisen-on-fourth-domain-and/index.html +++ b/_site/2011/09/jonathan-eisen-on-fourth-domain-and/index.html @@ -57,7 +57,7 @@

    Recology

    Jonathan Eisen on the Fourth Domain and Open Science

      - +   vimeo Evolution Phylogenetics sciencetalks
    @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/my-take-on-r-introduction-talk/index.html b/_site/2011/09/my-take-on-r-introduction-talk/index.html index 946309e30a..b7d4757a36 100644 --- a/_site/2011/09/my-take-on-r-introduction-talk/index.html +++ b/_site/2011/09/my-take-on-r-introduction-talk/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/open-science-talk-by-carl-boettiger/index.html b/_site/2011/09/open-science-talk-by-carl-boettiger/index.html index fd2abffa4a..53c2c37ba7 100644 --- a/_site/2011/09/open-science-talk-by-carl-boettiger/index.html +++ b/_site/2011/09/open-science-talk-by-carl-boettiger/index.html @@ -57,7 +57,7 @@

    Recology

    Open science talk by Carl Boettiger

      - +   openaccess Evolution Ecology sciencetalks
    @@ -92,27 +92,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/r-tutorial-on-visualizationsgraphics/index.html b/_site/2011/09/r-tutorial-on-visualizationsgraphics/index.html index 29598c9e68..d6d0144914 100644 --- a/_site/2011/09/r-tutorial-on-visualizationsgraphics/index.html +++ b/_site/2011/09/r-tutorial-on-visualizationsgraphics/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/short-on-funding-cant-get-grant/index.html b/_site/2011/09/short-on-funding-cant-get-grant/index.html index ad38eaf78f..6bccf45793 100644 --- a/_site/2011/09/short-on-funding-cant-get-grant/index.html +++ b/_site/2011/09/short-on-funding-cant-get-grant/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/09/ten-simple-rules-for-oa-publishers-talk/index.html b/_site/2011/09/ten-simple-rules-for-oa-publishers-talk/index.html index 2024d38e92..541c8c1c0c 100644 --- a/_site/2011/09/ten-simple-rules-for-oa-publishers-talk/index.html +++ b/_site/2011/09/ten-simple-rules-for-oa-publishers-talk/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/10/my-little-presentation-on-getting-web/index.html b/_site/2011/10/my-little-presentation-on-getting-web/index.html index 413ec16dc0..68e241b9b4 100644 --- a/_site/2011/10/my-little-presentation-on-getting-web/index.html +++ b/_site/2011/10/my-little-presentation-on-getting-web/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/10/new-food-web-dataset/index.html b/_site/2011/10/new-food-web-dataset/index.html index 5537f64130..e2ef1b6118 100644 --- a/_site/2011/10/new-food-web-dataset/index.html +++ b/_site/2011/10/new-food-web-dataset/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/10/phylogenetic-community-structure-pglmms/index.html b/_site/2011/10/phylogenetic-community-structure-pglmms/index.html index 9ca48a29af..69bcc60ec3 100644 --- a/_site/2011/10/phylogenetic-community-structure-pglmms/index.html +++ b/_site/2011/10/phylogenetic-community-structure-pglmms/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/10/r-talk-on-regular-expressions-regex/index.html b/_site/2011/10/r-talk-on-regular-expressions-regex/index.html index bb899fc6bf..bca87ff831 100644 --- a/_site/2011/10/r-talk-on-regular-expressions-regex/index.html +++ b/_site/2011/10/r-talk-on-regular-expressions-regex/index.html @@ -109,27 +109,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/10/two-new-ropensci-r-packages-are-on-cran/index.html b/_site/2011/10/two-new-ropensci-r-packages-are-on-cran/index.html index 3ff6334652..c85d89c528 100644 --- a/_site/2011/10/two-new-ropensci-r-packages-are-on-cran/index.html +++ b/_site/2011/10/two-new-ropensci-r-packages-are-on-cran/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/10/two-sex-demographic-models-in-r/index.html b/_site/2011/10/two-sex-demographic-models-in-r/index.html index ce25f369a8..c5a65c3d15 100644 --- a/_site/2011/10/two-sex-demographic-models-in-r/index.html +++ b/_site/2011/10/two-sex-demographic-models-in-r/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/11/check-out-video-of-my-research-at/index.html b/_site/2011/11/check-out-video-of-my-research-at/index.html index 904b14cfdd..0fa1add194 100644 --- a/_site/2011/11/check-out-video-of-my-research-at/index.html +++ b/_site/2011/11/check-out-video-of-my-research-at/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/11/my-talk-on-doing-phylogenetics-in-r/index.html b/_site/2011/11/my-talk-on-doing-phylogenetics-in-r/index.html index 14487bc083..bc525f0ad3 100644 --- a/_site/2011/11/my-talk-on-doing-phylogenetics-in-r/index.html +++ b/_site/2011/11/my-talk-on-doing-phylogenetics-in-r/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/11/public-vote-open-for-mendely-plos/index.html b/_site/2011/11/public-vote-open-for-mendely-plos/index.html index 032d1dd63c..92a198aa99 100644 --- a/_site/2011/11/public-vote-open-for-mendely-plos/index.html +++ b/_site/2011/11/public-vote-open-for-mendely-plos/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/11/ropensci-won-3rd-place-in-the-plos-mendeley-binary-battle/index.html b/_site/2011/11/ropensci-won-3rd-place-in-the-plos-mendeley-binary-battle/index.html index 9455c3b991..adbf20391f 100644 --- a/_site/2011/11/ropensci-won-3rd-place-in-the-plos-mendeley-binary-battle/index.html +++ b/_site/2011/11/ropensci-won-3rd-place-in-the-plos-mendeley-binary-battle/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/12/dynamic-views-don-t-support-javascript-so-reverting-back-to-simple-views/index.html b/_site/2011/12/dynamic-views-don-t-support-javascript-so-reverting-back-to-simple-views/index.html index 3a7b96e1a2..af6831adf5 100644 --- a/_site/2011/12/dynamic-views-don-t-support-javascript-so-reverting-back-to-simple-views/index.html +++ b/_site/2011/12/dynamic-views-don-t-support-javascript-so-reverting-back-to-simple-views/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/12/i-work-for-internet/index.html b/_site/2011/12/i-work-for-internet/index.html index f2d87bc259..234d71bed2 100644 --- a/_site/2011/12/i-work-for-internet/index.html +++ b/_site/2011/12/i-work-for-internet/index.html @@ -100,27 +100,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/12/londonr-meetings-presentations/index.html b/_site/2011/12/londonr-meetings-presentations/index.html index 9b15b5374d..9b5a4778e3 100644 --- a/_site/2011/12/londonr-meetings-presentations/index.html +++ b/_site/2011/12/londonr-meetings-presentations/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/12/recology-is-1-yr-old/index.html b/_site/2011/12/recology-is-1-yr-old/index.html index 8c841138c1..b93fd7f635 100644 --- a/_site/2011/12/recology-is-1-yr-old/index.html +++ b/_site/2011/12/recology-is-1-yr-old/index.html @@ -57,7 +57,7 @@

    Recology

    Recology is 1 yr old

      - +   ggplot2 Evolution Ecology R
    @@ -125,27 +125,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2011/12/weecology-can-has-new-mammal-dataset/index.html b/_site/2011/12/weecology-can-has-new-mammal-dataset/index.html index 16a04dcb9d..9cdbc40a4b 100644 --- a/_site/2011/12/weecology-can-has-new-mammal-dataset/index.html +++ b/_site/2011/12/weecology-can-has-new-mammal-dataset/index.html @@ -118,27 +118,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/RNetLogo/index.html b/_site/2012/01/RNetLogo/index.html index 3455bbaf1a..b407d3ba88 100644 --- a/_site/2012/01/RNetLogo/index.html +++ b/_site/2012/01/RNetLogo/index.html @@ -92,27 +92,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/flora-north-america-scraping/index.html b/_site/2012/01/flora-north-america-scraping/index.html index d99cd3e8c9..9010bad824 100644 --- a/_site/2012/01/flora-north-america-scraping/index.html +++ b/_site/2012/01/flora-north-america-scraping/index.html @@ -110,27 +110,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/logistic-regression-barplot-fig/index.html b/_site/2012/01/logistic-regression-barplot-fig/index.html index 640627b02b..d05e8be337 100644 --- a/_site/2012/01/logistic-regression-barplot-fig/index.html +++ b/_site/2012/01/logistic-regression-barplot-fig/index.html @@ -111,27 +111,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/moving-from-blogger-wordpress-to-jekyll/index.html b/_site/2012/01/moving-from-blogger-wordpress-to-jekyll/index.html index ec262d7d9a..8048dbdece 100644 --- a/_site/2012/01/moving-from-blogger-wordpress-to-jekyll/index.html +++ b/_site/2012/01/moving-from-blogger-wordpress-to-jekyll/index.html @@ -111,27 +111,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/phylogeny-resolution/index.html b/_site/2012/01/phylogeny-resolution/index.html index 2e0244c2fc..040a7edb76 100644 --- a/_site/2012/01/phylogeny-resolution/index.html +++ b/_site/2012/01/phylogeny-resolution/index.html @@ -146,27 +146,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/reviewing-peer-review-process/index.html b/_site/2012/01/reviewing-peer-review-process/index.html index 74443f05a2..eed307bc98 100644 --- a/_site/2012/01/reviewing-peer-review-process/index.html +++ b/_site/2012/01/reviewing-peer-review-process/index.html @@ -116,27 +116,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/01/testing-twitterfeed/index.html b/_site/2012/01/testing-twitterfeed/index.html index c689fb3ca9..7be0e7bf9a 100644 --- a/_site/2012/01/testing-twitterfeed/index.html +++ b/_site/2012/01/testing-twitterfeed/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/02/math-ecology-survey/index.html b/_site/2012/02/math-ecology-survey/index.html index 0330c1c053..f212a66534 100644 --- a/_site/2012/02/math-ecology-survey/index.html +++ b/_site/2012/02/math-ecology-survey/index.html @@ -92,27 +92,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/02/science-publications-need-interactive-graphics/index.html b/_site/2012/02/science-publications-need-interactive-graphics/index.html index 45b9f83ab6..afc3eb68c0 100644 --- a/_site/2012/02/science-publications-need-interactive-graphics/index.html +++ b/_site/2012/02/science-publications-need-interactive-graphics/index.html @@ -102,27 +102,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/03/mvabund/index.html b/_site/2012/03/mvabund/index.html index eb52f40cdc..b104b0b800 100644 --- a/_site/2012/03/mvabund/index.html +++ b/_site/2012/03/mvabund/index.html @@ -98,27 +98,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/05/opencpu-github-stats/index.html b/_site/2012/05/opencpu-github-stats/index.html index 980cca6571..77d1bac2aa 100644 --- a/_site/2012/05/opencpu-github-stats/index.html +++ b/_site/2012/05/opencpu-github-stats/index.html @@ -110,27 +110,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/06/recent-r-eeb-packages/index.html b/_site/2012/06/recent-r-eeb-packages/index.html index 3d6ce485ce..21e1b3ab76 100644 --- a/_site/2012/06/recent-r-eeb-packages/index.html +++ b/_site/2012/06/recent-r-eeb-packages/index.html @@ -156,27 +156,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/07/global-names-resolver/index.html b/_site/2012/07/global-names-resolver/index.html index 1f8009672b..685dc412ec 100644 --- a/_site/2012/07/global-names-resolver/index.html +++ b/_site/2012/07/global-names-resolver/index.html @@ -185,27 +185,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/08/ecology-unconference/index.html b/_site/2012/08/ecology-unconference/index.html index 67472aae5b..57222df461 100644 --- a/_site/2012/08/ecology-unconference/index.html +++ b/_site/2012/08/ecology-unconference/index.html @@ -112,27 +112,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/08/get-ecoevo-journal-titles/index.html b/_site/2012/08/get-ecoevo-journal-titles/index.html index dfb83342a3..7bcd989222 100644 --- a/_site/2012/08/get-ecoevo-journal-titles/index.html +++ b/_site/2012/08/get-ecoevo-journal-titles/index.html @@ -183,27 +183,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/08/ggplot-inset-map/index.html b/_site/2012/08/ggplot-inset-map/index.html index 557d2a9079..7accf24d6f 100644 --- a/_site/2012/08/ggplot-inset-map/index.html +++ b/_site/2012/08/ggplot-inset-map/index.html @@ -179,27 +179,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/08/making-matrices/index.html b/_site/2012/08/making-matrices/index.html index ce8820c9b9..11f8137d61 100644 --- a/_site/2012/08/making-matrices/index.html +++ b/_site/2012/08/making-matrices/index.html @@ -213,27 +213,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/09/getting-data/index.html b/_site/2012/09/getting-data/index.html index 9041d86a3b..76916b9d87 100644 --- a/_site/2012/09/getting-data/index.html +++ b/_site/2012/09/getting-data/index.html @@ -176,27 +176,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/09/gov-dat/index.html b/_site/2012/09/gov-dat/index.html index 4627b30a38..24147bcf69 100644 --- a/_site/2012/09/gov-dat/index.html +++ b/_site/2012/09/gov-dat/index.html @@ -251,27 +251,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/09/rmetadata/index.html b/_site/2012/09/rmetadata/index.html index 62f128e9e9..4c09bde21b 100644 --- a/_site/2012/09/rmetadata/index.html +++ b/_site/2012/09/rmetadata/index.html @@ -364,27 +364,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/09/rvertnet/index.html b/_site/2012/09/rvertnet/index.html index 0ae3033f73..41f3df603d 100644 --- a/_site/2012/09/rvertnet/index.html +++ b/_site/2012/09/rvertnet/index.html @@ -117,27 +117,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/10/R2G2-package/index.html b/_site/2012/10/R2G2-package/index.html index 91b3cd307d..c967cbcf1f 100644 --- a/_site/2012/10/R2G2-package/index.html +++ b/_site/2012/10/R2G2-package/index.html @@ -163,27 +163,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/10/get-taxa-downstream/index.html b/_site/2012/10/get-taxa-downstream/index.html index 235ec29cac..d22e0c100e 100644 --- a/_site/2012/10/get-taxa-downstream/index.html +++ b/_site/2012/10/get-taxa-downstream/index.html @@ -170,27 +170,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/10/phylogenetic-tree-balance/index.html b/_site/2012/10/phylogenetic-tree-balance/index.html index bdfc9411d9..0985489eb9 100644 --- a/_site/2012/10/phylogenetic-tree-balance/index.html +++ b/_site/2012/10/phylogenetic-tree-balance/index.html @@ -215,27 +215,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/10/rgbif-newfxns/index.html b/_site/2012/10/rgbif-newfxns/index.html index b97b75862e..52f8b4a7f3 100644 --- a/_site/2012/10/rgbif-newfxns/index.html +++ b/_site/2012/10/rgbif-newfxns/index.html @@ -265,27 +265,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/11/altecology/index.html b/_site/2012/11/altecology/index.html index a63c72f51c..63accd2800 100644 --- a/_site/2012/11/altecology/index.html +++ b/_site/2012/11/altecology/index.html @@ -158,27 +158,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/12/is-invasive/index.html b/_site/2012/12/is-invasive/index.html index 9f443c0a0e..cf1a8c472d 100644 --- a/_site/2012/12/is-invasive/index.html +++ b/_site/2012/12/is-invasive/index.html @@ -159,27 +159,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/12/shiny-r/index.html b/_site/2012/12/shiny-r/index.html index f3e8c9f2cc..e9227b7584 100644 --- a/_site/2012/12/shiny-r/index.html +++ b/_site/2012/12/shiny-r/index.html @@ -164,27 +164,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2012/12/taxize/index.html b/_site/2012/12/taxize/index.html index 4859eba4cf..bcefddabea 100644 --- a/_site/2012/12/taxize/index.html +++ b/_site/2012/12/taxize/index.html @@ -463,27 +463,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/01/api-token/index.html b/_site/2013/01/api-token/index.html index fe44d37447..f2e3c83d70 100644 --- a/_site/2013/01/api-token/index.html +++ b/_site/2013/01/api-token/index.html @@ -118,27 +118,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/01/ifttt-test/index.html b/_site/2013/01/ifttt-test/index.html index 4f17011ec5..ca25d97a09 100644 --- a/_site/2013/01/ifttt-test/index.html +++ b/_site/2013/01/ifttt-test/index.html @@ -88,27 +88,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/01/open-science-challenge/index.html b/_site/2013/01/open-science-challenge/index.html index 6a442cb6ea..925aa0e209 100644 --- a/_site/2013/01/open-science-challenge/index.html +++ b/_site/2013/01/open-science-challenge/index.html @@ -145,27 +145,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/01/tnrs-use-case/index.html b/_site/2013/01/tnrs-use-case/index.html index 13762ccf41..8bfd8836b3 100644 --- a/_site/2013/01/tnrs-use-case/index.html +++ b/_site/2013/01/tnrs-use-case/index.html @@ -286,27 +286,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/02/academia-reboot/index.html b/_site/2013/02/academia-reboot/index.html index a553de1682..78d7f543cc 100644 --- a/_site/2013/02/academia-reboot/index.html +++ b/_site/2013/02/academia-reboot/index.html @@ -129,27 +129,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/02/common-tree/index.html b/_site/2013/02/common-tree/index.html index 8093e7259e..8805da1150 100644 --- a/_site/2013/02/common-tree/index.html +++ b/_site/2013/02/common-tree/index.html @@ -151,27 +151,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/03/r-metadata/index.html b/_site/2013/03/r-metadata/index.html index 1ef3399a90..a89ba913fd 100644 --- a/_site/2013/03/r-metadata/index.html +++ b/_site/2013/03/r-metadata/index.html @@ -382,27 +382,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/03/ropensci-collaboration/index.html b/_site/2013/03/ropensci-collaboration/index.html index 01d229ecfc..94f8d7c8a8 100644 --- a/_site/2013/03/ropensci-collaboration/index.html +++ b/_site/2013/03/ropensci-collaboration/index.html @@ -196,27 +196,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/05/rbison/index.html b/_site/2013/05/rbison/index.html index 1b125e8608..974d3d1c09 100644 --- a/_site/2013/05/rbison/index.html +++ b/_site/2013/05/rbison/index.html @@ -221,27 +221,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/06/coffeehouse/index.html b/_site/2013/06/coffeehouse/index.html index 12c76589c3..f29645bb6c 100644 --- a/_site/2013/06/coffeehouse/index.html +++ b/_site/2013/06/coffeehouse/index.html @@ -119,27 +119,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/06/couch/index.html b/_site/2013/06/couch/index.html index 7ea533ea5b..0fa494b7c6 100644 --- a/_site/2013/06/couch/index.html +++ b/_site/2013/06/couch/index.html @@ -359,27 +359,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/06/fylopic/index.html b/_site/2013/06/fylopic/index.html index 1ada323090..c31dcd9447 100644 --- a/_site/2013/06/fylopic/index.html +++ b/_site/2013/06/fylopic/index.html @@ -212,27 +212,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/06/geojson/index.html b/_site/2013/06/geojson/index.html index 46e826c9e4..0929d5a393 100644 --- a/_site/2013/06/geojson/index.html +++ b/_site/2013/06/geojson/index.html @@ -178,27 +178,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/06/sofa/index.html b/_site/2013/06/sofa/index.html index 85edd64d56..19aab07fcd 100644 --- a/_site/2013/06/sofa/index.html +++ b/_site/2013/06/sofa/index.html @@ -242,27 +242,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/07/beyond-academia/index.html b/_site/2013/07/beyond-academia/index.html index 9ddcc0f7b8..42ee39061d 100644 --- a/_site/2013/07/beyond-academia/index.html +++ b/_site/2013/07/beyond-academia/index.html @@ -112,27 +112,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/07/code/index.html b/_site/2013/07/code/index.html index 257d857111..33eeb3beac 100644 --- a/_site/2013/07/code/index.html +++ b/_site/2013/07/code/index.html @@ -142,27 +142,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/07/r-ecology-workshop/index.html b/_site/2013/07/r-ecology-workshop/index.html index f518c88a28..62d0885c43 100644 --- a/_site/2013/07/r-ecology-workshop/index.html +++ b/_site/2013/07/r-ecology-workshop/index.html @@ -96,27 +96,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/07/r-resources/index.html b/_site/2013/07/r-resources/index.html index abfccecd7e..96b8b148d7 100644 --- a/_site/2013/07/r-resources/index.html +++ b/_site/2013/07/r-resources/index.html @@ -103,27 +103,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/08/govdat-vignette/index.html b/_site/2013/08/govdat-vignette/index.html index 3b3b8e2b67..82eeee5caf 100644 --- a/_site/2013/08/govdat-vignette/index.html +++ b/_site/2013/08/govdat-vignette/index.html @@ -394,27 +394,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/08/phenology/index.html b/_site/2013/08/phenology/index.html index 030254e55f..04e7f559da 100644 --- a/_site/2013/08/phenology/index.html +++ b/_site/2013/08/phenology/index.html @@ -104,27 +104,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/08/sciordata/index.html b/_site/2013/08/sciordata/index.html index 07bb5956d8..e6f06d8b99 100644 --- a/_site/2013/08/sciordata/index.html +++ b/_site/2013/08/sciordata/index.html @@ -229,27 +229,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/09/natenemies/index.html b/_site/2013/09/natenemies/index.html index 8de1ead1a0..9ceff131dd 100644 --- a/_site/2013/09/natenemies/index.html +++ b/_site/2013/09/natenemies/index.html @@ -94,27 +94,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/09/taxonomy-in-three-acts/index.html b/_site/2013/09/taxonomy-in-three-acts/index.html index b84f9c09c3..5cf3a7537f 100644 --- a/_site/2013/09/taxonomy-in-three-acts/index.html +++ b/_site/2013/09/taxonomy-in-three-acts/index.html @@ -169,27 +169,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/10/codeinpapers/index.html b/_site/2013/10/codeinpapers/index.html index aaf75e2c24..846bc97d0e 100644 --- a/_site/2013/10/codeinpapers/index.html +++ b/_site/2013/10/codeinpapers/index.html @@ -92,27 +92,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/10/shutdown/index.html b/_site/2013/10/shutdown/index.html index 196ce21cce..7260820303 100644 --- a/_site/2013/10/shutdown/index.html +++ b/_site/2013/10/shutdown/index.html @@ -167,27 +167,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2013/11/jekyll-intro/index.html b/_site/2013/11/jekyll-intro/index.html index 62f63e24fe..bf6b3b8ead 100644 --- a/_site/2013/11/jekyll-intro/index.html +++ b/_site/2013/11/jekyll-intro/index.html @@ -202,27 +202,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/01/cites/index.html b/_site/2014/01/cites/index.html index 6ed10b59ea..34023ca562 100644 --- a/_site/2014/01/cites/index.html +++ b/_site/2014/01/cites/index.html @@ -219,27 +219,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/01/rgauges-hourly/index.html b/_site/2014/01/rgauges-hourly/index.html index 8d5caff358..efb4e06c43 100644 --- a/_site/2014/01/rgauges-hourly/index.html +++ b/_site/2014/01/rgauges-hourly/index.html @@ -363,27 +363,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/02/cowsay/index.html b/_site/2014/02/cowsay/index.html index 20de60df3f..e88ec3db34 100644 --- a/_site/2014/02/cowsay/index.html +++ b/_site/2014/02/cowsay/index.html @@ -293,27 +293,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/05/analogsea/index.html b/_site/2014/05/analogsea/index.html index f6d3048605..12870d3539 100644 --- a/_site/2014/05/analogsea/index.html +++ b/_site/2014/05/analogsea/index.html @@ -245,27 +245,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/05/logplotreboot/index.html b/_site/2014/05/logplotreboot/index.html index 4177a6b50d..16a8c82dfa 100644 --- a/_site/2014/05/logplotreboot/index.html +++ b/_site/2014/05/logplotreboot/index.html @@ -200,27 +200,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/06/analogsea-v01/index.html b/_site/2014/06/analogsea-v01/index.html index e798dfe278..370e550d32 100644 --- a/_site/2014/06/analogsea-v01/index.html +++ b/_site/2014/06/analogsea-v01/index.html @@ -359,27 +359,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/08/rsunlight/index.html b/_site/2014/08/rsunlight/index.html index 946e43b0fe..270a47cd69 100644 --- a/_site/2014/08/rsunlight/index.html +++ b/_site/2014/08/rsunlight/index.html @@ -327,27 +327,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/10/conditionality-meta-analysis/index.html b/_site/2014/10/conditionality-meta-analysis/index.html index 2487b5e818..f9b12438a9 100644 --- a/_site/2014/10/conditionality-meta-analysis/index.html +++ b/_site/2014/10/conditionality-meta-analysis/index.html @@ -208,27 +208,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/11/ckanr-intro/index.html b/_site/2014/11/ckanr-intro/index.html index cb2e1a1254..ec88a13db8 100644 --- a/_site/2014/11/ckanr-intro/index.html +++ b/_site/2014/11/ckanr-intro/index.html @@ -444,27 +444,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/11/github-fun/index.html b/_site/2014/11/github-fun/index.html index 64b9605293..a96eb89515 100644 --- a/_site/2014/11/github-fun/index.html +++ b/_site/2014/11/github-fun/index.html @@ -223,27 +223,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/11/sofa/index.html b/_site/2014/11/sofa/index.html index ae821236a2..ad5a14cbd3 100644 --- a/_site/2014/11/sofa/index.html +++ b/_site/2014/11/sofa/index.html @@ -360,27 +360,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/11/taxize-1000/index.html b/_site/2014/11/taxize-1000/index.html index 922cf9dc0e..725faa2de3 100644 --- a/_site/2014/11/taxize-1000/index.html +++ b/_site/2014/11/taxize-1000/index.html @@ -255,27 +255,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/altmetrics-anywhere/index.html b/_site/2014/12/altmetrics-anywhere/index.html index 443af85059..3e490e10cb 100644 --- a/_site/2014/12/altmetrics-anywhere/index.html +++ b/_site/2014/12/altmetrics-anywhere/index.html @@ -223,27 +223,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/http-codes/index.html b/_site/2014/12/http-codes/index.html index 831ded29d4..85c472804e 100644 --- a/_site/2014/12/http-codes/index.html +++ b/_site/2014/12/http-codes/index.html @@ -284,27 +284,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/icanhaz-altmetrics/index.html b/_site/2014/12/icanhaz-altmetrics/index.html index 775c56eafe..9eeeb3389c 100644 --- a/_site/2014/12/icanhaz-altmetrics/index.html +++ b/_site/2014/12/icanhaz-altmetrics/index.html @@ -223,27 +223,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/multi-handle/index.html b/_site/2014/12/multi-handle/index.html index 0b309fcf7a..553436815e 100644 --- a/_site/2014/12/multi-handle/index.html +++ b/_site/2014/12/multi-handle/index.html @@ -139,27 +139,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/museum-aamsf/index.html b/_site/2014/12/museum-aamsf/index.html index 75cbcd5103..f994d002af 100644 --- a/_site/2014/12/museum-aamsf/index.html +++ b/_site/2014/12/museum-aamsf/index.html @@ -192,27 +192,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/pytaxize-itis/index.html b/_site/2014/12/pytaxize-itis/index.html index e342a44fc5..36de186072 100644 --- a/_site/2014/12/pytaxize-itis/index.html +++ b/_site/2014/12/pytaxize-itis/index.html @@ -199,27 +199,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/rplos-pubs-country/index.html b/_site/2014/12/rplos-pubs-country/index.html index 307f97d2d0..bc3e74b22e 100644 --- a/_site/2014/12/rplos-pubs-country/index.html +++ b/_site/2014/12/rplos-pubs-country/index.html @@ -191,27 +191,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2014/12/taxize-workflows/index.html b/_site/2014/12/taxize-workflows/index.html index 533ab40595..ba0bb4f0fa 100644 --- a/_site/2014/12/taxize-workflows/index.html +++ b/_site/2014/12/taxize-workflows/index.html @@ -264,27 +264,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/01/binomen/index.html b/_site/2015/01/binomen/index.html index 3dd6dece92..c013738521 100644 --- a/_site/2015/01/binomen/index.html +++ b/_site/2015/01/binomen/index.html @@ -228,27 +228,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/01/discourse-in-r/index.html b/_site/2015/01/discourse-in-r/index.html index d0410aa507..4491153338 100644 --- a/_site/2015/01/discourse-in-r/index.html +++ b/_site/2015/01/discourse-in-r/index.html @@ -235,27 +235,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/01/elasticsearch/index.html b/_site/2015/01/elasticsearch/index.html index af84c12cf6..25469f9d04 100644 --- a/_site/2015/01/elasticsearch/index.html +++ b/_site/2015/01/elasticsearch/index.html @@ -741,27 +741,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/01/geojson-topojson-io/index.html b/_site/2015/01/geojson-topojson-io/index.html index 11b6b1164a..7e06aacf37 100644 --- a/_site/2015/01/geojson-topojson-io/index.html +++ b/_site/2015/01/geojson-topojson-io/index.html @@ -385,27 +385,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/01/gistr-github-gists/index.html b/_site/2015/01/gistr-github-gists/index.html index d809a10621..a8a9844d6f 100644 --- a/_site/2015/01/gistr-github-gists/index.html +++ b/_site/2015/01/gistr-github-gists/index.html @@ -395,27 +395,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/01/httping/index.html b/_site/2015/01/httping/index.html index ebc56991ab..760fd55cd8 100644 --- a/_site/2015/01/httping/index.html +++ b/_site/2015/01/httping/index.html @@ -274,27 +274,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/02/elasticsearch-backup-restore/index.html b/_site/2015/02/elasticsearch-backup-restore/index.html index c667d2eacc..ff00aa16d3 100644 --- a/_site/2015/02/elasticsearch-backup-restore/index.html +++ b/_site/2015/02/elasticsearch-backup-restore/index.html @@ -57,7 +57,7 @@

    Recology

    Elasticsearch backup and restore

      - +   elasticsearch
    @@ -113,27 +113,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/02/package-dev/index.html b/_site/2015/02/package-dev/index.html index 4e7e57553e..b3d6ef43db 100644 --- a/_site/2015/02/package-dev/index.html +++ b/_site/2015/02/package-dev/index.html @@ -158,27 +158,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/02/secure-elasticsearch/index.html b/_site/2015/02/secure-elasticsearch/index.html index 9cc68fa1ec..faf4141a1d 100644 --- a/_site/2015/02/secure-elasticsearch/index.html +++ b/_site/2015/02/secure-elasticsearch/index.html @@ -114,27 +114,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/03/couch-dataframes/index.html b/_site/2015/03/couch-dataframes/index.html index 9b1f4419c1..af82f4038f 100644 --- a/_site/2015/03/couch-dataframes/index.html +++ b/_site/2015/03/couch-dataframes/index.html @@ -260,27 +260,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/03/csl-client/index.html b/_site/2015/03/csl-client/index.html index c882cd33b3..a0f4daf144 100644 --- a/_site/2015/03/csl-client/index.html +++ b/_site/2015/03/csl-client/index.html @@ -212,27 +212,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/03/faster-solr/index.html b/_site/2015/03/faster-solr/index.html index e3b6f2a9cb..9bbe619fd7 100644 --- a/_site/2015/03/faster-solr/index.html +++ b/_site/2015/03/faster-solr/index.html @@ -193,27 +193,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/04/geojson-io/index.html b/_site/2015/04/geojson-io/index.html index dfb59338b5..361c8ebe29 100644 --- a/_site/2015/04/geojson-io/index.html +++ b/_site/2015/04/geojson-io/index.html @@ -341,27 +341,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/04/the-new-way/index.html b/_site/2015/04/the-new-way/index.html index d94aff24a3..a3cf4f5562 100644 --- a/_site/2015/04/the-new-way/index.html +++ b/_site/2015/04/the-new-way/index.html @@ -286,27 +286,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/05/mow-the-lawn/index.html b/_site/2015/05/mow-the-lawn/index.html index 256aed7dda..99c6600895 100644 --- a/_site/2015/05/mow-the-lawn/index.html +++ b/_site/2015/05/mow-the-lawn/index.html @@ -446,27 +446,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/05/openadds/index.html b/_site/2015/05/openadds/index.html index b6a69cc6e1..1e0d41464e 100644 --- a/_site/2015/05/openadds/index.html +++ b/_site/2015/05/openadds/index.html @@ -239,27 +239,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/06/idigbio-in-spocc/index.html b/_site/2015/06/idigbio-in-spocc/index.html index fb930aa236..683430dda6 100644 --- a/_site/2015/06/idigbio-in-spocc/index.html +++ b/_site/2015/06/idigbio-in-spocc/index.html @@ -198,27 +198,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/06/rerddap/index.html b/_site/2015/06/rerddap/index.html index cf5bb6ca27..6f13e53207 100644 --- a/_site/2015/06/rerddap/index.html +++ b/_site/2015/06/rerddap/index.html @@ -379,27 +379,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/07/weather-data-with-rnoaa/index.html b/_site/2015/07/weather-data-with-rnoaa/index.html index 8850f41e38..3e7d83bc35 100644 --- a/_site/2015/07/weather-data-with-rnoaa/index.html +++ b/_site/2015/07/weather-data-with-rnoaa/index.html @@ -596,27 +596,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/08/full-text/index.html b/_site/2015/08/full-text/index.html index ff36a5257a..cd128b6431 100644 --- a/_site/2015/08/full-text/index.html +++ b/_site/2015/08/full-text/index.html @@ -507,27 +507,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/09/oai-client/index.html b/_site/2015/09/oai-client/index.html index 8b780697ec..c04c61d945 100644 --- a/_site/2015/09/oai-client/index.html +++ b/_site/2015/09/oai-client/index.html @@ -226,27 +226,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/10/analogsea-cran/index.html b/_site/2015/10/analogsea-cran/index.html index 26d6f66e9e..f3a3d320d8 100644 --- a/_site/2015/10/analogsea-cran/index.html +++ b/_site/2015/10/analogsea-cran/index.html @@ -143,27 +143,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/10/noaa-isd/index.html b/_site/2015/10/noaa-isd/index.html index 98a21e51be..8d8376ea8c 100644 --- a/_site/2015/10/noaa-isd/index.html +++ b/_site/2015/10/noaa-isd/index.html @@ -246,27 +246,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - Metrics for open source projects - 19 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/10/open-source-metrics/index.html b/_site/2015/10/open-source-metrics/index.html index c0412b876c..7be0f0681c 100644 --- a/_site/2015/10/open-source-metrics/index.html +++ b/_site/2015/10/open-source-metrics/index.html @@ -194,27 +194,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • diff --git a/_site/2015/11/crossref-clients/index.html b/_site/2015/11/crossref-clients/index.html index fc34724bad..758e0b69e2 100644 --- a/_site/2015/11/crossref-clients/index.html +++ b/_site/2015/11/crossref-clients/index.html @@ -260,27 +260,27 @@

    Related Posts

  • - - pygbif - GBIF client for Python - 12 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + pygbif - GBIF client for Python + 12 Nov 2015

  • - - Metrics for open source projects - 19 Oct 2015 + + noaa - Integrated Surface Database data + 21 Oct 2015

  • diff --git a/_site/2015/11/pygbif/index.html b/_site/2015/11/pygbif/index.html index bc726fe964..691c2879fb 100644 --- a/_site/2015/11/pygbif/index.html +++ b/_site/2015/11/pygbif/index.html @@ -215,27 +215,27 @@

    Related Posts

  • - - Crossref programmatic clients - 30 Nov 2015 + + binomen - Tools for slicing and dicing taxonomic names + 08 Dec 2015

  • - - noaa - Integrated Surface Database data - 21 Oct 2015 + + Crossref programmatic clients + 30 Nov 2015

  • - - Metrics for open source projects - 19 Oct 2015 + + noaa - Integrated Surface Database data + 21 Oct 2015

  • diff --git a/_site/2015/12/binomen-taxonomy-tools/index.html b/_site/2015/12/binomen-taxonomy-tools/index.html new file mode 100644 index 0000000000..7b4d6ee222 --- /dev/null +++ b/_site/2015/12/binomen-taxonomy-tools/index.html @@ -0,0 +1,420 @@ + + + + + + + + + + binomen - Tools for slicing and dicing taxonomic names · + Recology, R/etc. + + + + + + + + + + + + + + + + + + + + + +
    +
    +

    Recology

    + +

    R/etc.

    + +
    + + +
    +
    +
    + +
    +
    +

    binomen - Tools for slicing and dicing taxonomic names

      + +   + R taxonomy split-apply-combine
    + +  Source: .Rmd/.md +

    +

    The first version of binomen is now up on CRAN. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to taxize, where you can get taxonomic data on taxonomic names from the web.

    + +

    The classes (S3):

    + +
      +
    • taxon
    • +
    • taxonref
    • +
    • taxonrefs
    • +
    • binomial
    • +
    • grouping (i.e., classification - used different term to avoid conflict with classification in taxize)
    • +
    + +

    For example, the binomial class is defined by a genus, epithet, authority, and optional full species name and canonical version.

    +
    binomial("Poa", "annua", authority="L.")
    +
    <binomial>
    +  genus: Poa
    +  epithet: annua
    +  canonical:
    +  species:
    +  authority: L.
    +
    +

    The package has a suite of functions to work on these taxonomic classes:

    + +
      +
    • gethier() - get hierarchy from a taxon class
    • +
    • scatter() - make each row in taxonomic data.frame (taxondf) a separate taxon object within a single taxa object
    • +
    • assemble() - make a taxa object into a taxondf data.frame
    • +
    • pick() - pick out one or more taxonomic groups
    • +
    • pop() - pop out (drop) one or more taxonomic groups
    • +
    • span() - pick a range between two taxonomic groups (inclusive)
    • +
    • strain() - filter by taxonomic groups, like dplyr's filter
    • +
    • name() - get the taxon name for each taxonref object
    • +
    • uri() - get the reference uri for each taxonref object
    • +
    • rank() - get the taxonomic rank for each taxonref object
    • +
    • id() - get the reference uri for each taxonref object
    • +
    + +

    The approach in this package I suppose is sort of like split-apply-combine from plyr/dplyr, whereas this is aims to make it easy to do with taxonomic names.

    + +

    Install

    + +

    For examples below, you'll need the development version:

    +
    install.packages("binomen")
    +
    library("binomen")
    +
    +

    Make a taxon

    + +

    Make a taxon object

    +
    (obj <- make_taxon(genus="Poa", epithet="annua", authority="L.",
    +  family='Poaceae', clazz='Poales', kingdom='Plantae', variety='annua'))
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     family: Poaceae
    +#>     genus: Poa
    +#>     species: Poa annua
    +#>     variety: annua
    +
    +

    Index to various parts of the object

    + +

    The binomial

    +
    obj$binomial
    +#> <binomial>
    +#>   genus: Poa
    +#>   epithet: annua
    +#>   canonical: Poa annua
    +#>   species: Poa annua L.
    +#>   authority: L.
    +
    +

    The authority

    +
    obj$binomial$authority
    +#> [1] "L."
    +
    +

    The classification

    +
    obj$grouping
    +#> <grouping>
    +#>   kingdom: Plantae
    +#>   clazz: Poales
    +#>   family: Poaceae
    +#>   genus: Poa
    +#>   species: Poa annua
    +#>   variety: annua
    +
    +

    The family

    +
    obj$grouping$family
    +#> <taxonref>
    +#>   rank: family
    +#>   name: Poaceae
    +#>   id: none
    +#>   uri: none
    +
    +

    Subset taxon objects

    + +

    Get one or more ranks via pick()

    +
    obj %>% pick(family)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     family: Poaceae
    +obj %>% pick(family, genus)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     family: Poaceae
    +#>     genus: Poa
    +
    +

    Drop one or more ranks via pop()

    +
    obj %>% pop(family)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     genus: Poa
    +#>     species: Poa annua
    +#>     variety: annua
    +obj %>% pop(family, genus)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     species: Poa annua
    +#>     variety: annua
    +
    +

    Get a range of ranks via span()

    +
    obj %>% span(kingdom, family)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     family: Poaceae
    +
    +

    Extract classification as a data.frame

    +
    gethier(obj)
    +#>      rank      name
    +#> 1 kingdom   Plantae
    +#> 2   clazz    Poales
    +#> 3  family   Poaceae
    +#> 4   genus       Poa
    +#> 5 species Poa annua
    +#> 6 variety     annua
    +
    +

    Taxonomic data.frame's

    + +

    Make one

    +
    df <- data.frame(order = c('Asterales','Asterales','Fagales','Poales','Poales','Poales'),
    +  family = c('Asteraceae','Asteraceae','Fagaceae','Poaceae','Poaceae','Poaceae'),
    +  genus = c('Helianthus','Helianthus','Quercus','Poa','Festuca','Holodiscus'),
    +  stringsAsFactors = FALSE)
    +(df2 <- taxon_df(df))
    +#>       order     family      genus
    +#> 1 Asterales Asteraceae Helianthus
    +#> 2 Asterales Asteraceae Helianthus
    +#> 3   Fagales   Fagaceae    Quercus
    +#> 4    Poales    Poaceae        Poa
    +#> 5    Poales    Poaceae    Festuca
    +#> 6    Poales    Poaceae Holodiscus
    +
    +

    Parse - get rank order via pick()

    +
    df2 %>% pick(order)
    +#>       order
    +#> 1 Asterales
    +#> 2 Asterales
    +#> 3   Fagales
    +#> 4    Poales
    +#> 5    Poales
    +#> 6    Poales
    +
    +

    get ranks order, family, and genus via pick()

    +
    df2 %>% pick(order, family, genus)
    +#>       order     family      genus
    +#> 1 Asterales Asteraceae Helianthus
    +#> 2 Asterales Asteraceae Helianthus
    +#> 3   Fagales   Fagaceae    Quercus
    +#> 4    Poales    Poaceae        Poa
    +#> 5    Poales    Poaceae    Festuca
    +#> 6    Poales    Poaceae Holodiscus
    +
    +

    get range of names via span(), from rank X to rank Y

    +
    df2 %>% span(family, genus)
    +#>       family      genus
    +#> 1 Asteraceae Helianthus
    +#> 2 Asteraceae Helianthus
    +#> 3   Fagaceae    Quercus
    +#> 4    Poaceae        Poa
    +#> 5    Poaceae    Festuca
    +#> 6    Poaceae Holodiscus
    +
    +

    Separate each row into a taxon class (many taxon objects are a taxa class)

    +
    scatter(df2)
    +#> [[1]]
    +#> <taxon>
    +#>   binomial: Helianthus none
    +#>   grouping: 
    +#>     order: Asterales
    +#>     family: Asteraceae
    +#>     genus: Helianthus
    +#>     species: Helianthus none
    +#> 
    +#> [[2]]
    +#> <taxon>
    +#>   binomial: Helianthus none
    +#>   grouping: 
    +#>     order: Asterales
    +#>     family: Asteraceae
    +#>     genus: Helianthus
    +#>     species: Helianthus none
    +#> 
    +#> [[3]]
    +#> <taxon>
    +#>   binomial: Quercus none
    +#>   grouping: 
    +#>     order: Fagales
    +#>     family: Fagaceae
    +#>     genus: Quercus
    +#>     species: Quercus none
    +#> 
    +#> [[4]]
    +#> <taxon>
    +#>   binomial: Poa none
    +#>   grouping: 
    +#>     order: Poales
    +#>     family: Poaceae
    +#>     genus: Poa
    +#>     species: Poa none
    +#> 
    +#> [[5]]
    +#> <taxon>
    +#>   binomial: Festuca none
    +#>   grouping: 
    +#>     order: Poales
    +#>     family: Poaceae
    +#>     genus: Festuca
    +#>     species: Festuca none
    +#> 
    +#> [[6]]
    +#> <taxon>
    +#>   binomial: Holodiscus none
    +#>   grouping: 
    +#>     order: Poales
    +#>     family: Poaceae
    +#>     genus: Holodiscus
    +#>     species: Holodiscus none
    +#> 
    +#> attr(,"class")
    +#> [1] "taxa"
    +
    +

    And you can re-assemble a data.frame from the output of scatter() with assemble()

    +
    out <- scatter(df2)
    +assemble(out)
    +#>       order     family      genus         species
    +#> 1 Asterales Asteraceae Helianthus Helianthus none
    +#> 2 Asterales Asteraceae Helianthus Helianthus none
    +#> 3   Fagales   Fagaceae    Quercus    Quercus none
    +#> 4    Poales    Poaceae        Poa        Poa none
    +#> 5    Poales    Poaceae    Festuca    Festuca none
    +#> 6    Poales    Poaceae Holodiscus Holodiscus none
    +
    +

    Thoughts?

    + +

    I'm really curious what people think of binomen. I'm not sure how useful this will be in the wild. Try it. Let me know. Thanks much :)

    + +
    + +
    + + +comments powered by Disqus + + + +
    + + + + + + +
    + + + + + +
    + + diff --git a/_site/archives/index.html b/_site/archives/index.html index 56c83e6dd0..a0996efd6e 100644 --- a/_site/archives/index.html +++ b/_site/archives/index.html @@ -55,1287 +55,1294 @@

    Recology

    -

    Archive (183 entries)  

    +

    Archive (184 entries)  

    + + + +
    -

    http codes +

    http codes 02 Dec 2014

    -

    sofa - reboot +

    sofa - reboot 18 Nov 2014

    -

    R resources +

    R resources 30 Jul 2013

    -

    R to GeoJSON +

    R to GeoJSON 30 Jun 2013

    -

    Is invasive? +

    Is invasive? 13 Dec 2012

    -

    Recology is 1 yr old - 23 Dec 2011

    +

    Recology is 1 yr old + 24 Dec 2011

    -

    A Data Visualization Book - 08 Sep 2011

    +

    A Data Visualization Book + 09 Sep 2011

    -

    FigShare Talk - 08 Sep 2011

    +

    FigShare Talk + 09 Sep 2011

    -

    Monday at #ESA11 - 08 Aug 2011

    +

    Monday at #ESA11 + 09 Aug 2011

    -

    iEvoBio 2011 Synopsis - 22 Jun 2011

    +

    iEvoBio 2011 Synopsis + 23 Jun 2011

    -

    google reader +

    google reader 12 May 2011

    -

    Processing nested lists - 28 Apr 2011

    +

    Processing nested lists + 29 Apr 2011

    -

    Bio-ORACLE +

    Bio-ORACLE 25 Mar 2011

    -

    RStudio - 28 Feb 2011

    +

    RStudio + 01 Mar 2011

    -

    Farmer's markets data - 16 Feb 2011

    +

    Farmer's markets data + 17 Feb 2011

    -

    R-bloggers +

    R-bloggers 05 Jan 2011

    -

    Ngram ecological terms - 29 Dec 2010

    +

    Ngram ecological terms + 30 Dec 2010

    diff --git a/_site/atom.xml b/_site/atom.xml index 7e26a71e26..df143cc377 100644 --- a/_site/atom.xml +++ b/_site/atom.xml @@ -4,7 +4,7 @@ Recology - 2015-12-05T08:53:27-08:00 + 2015-12-08T08:32:22+01:00 http://recology.info/ Scott Chamberlain @@ -12,10 +12,279 @@ + + binomen - Tools for slicing and dicing taxonomic names + + 2015-12-08T00:00:00+01:00 + http://recology.info//2015/12/binomen-taxonomy-tools + <p>The first version of <code>binomen</code> is now up on [CRAN][binomecran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to <a href="https://github.com/ropensci/taxize">taxize</a>, where you can get taxonomic data on taxonomic names from the web.</p> + +<p>The classes (S3):</p> + +<ul> +<li><code>taxon</code></li> +<li><code>taxonref</code></li> +<li><code>taxonrefs</code></li> +<li><code>binomial</code></li> +<li><code>grouping</code> (i.e., classification - used different term to avoid conflict with classification in <code>taxize</code>)</li> +</ul> + +<p>For example, the <code>binomial</code> class is defined by a genus, epithet, authority, and optional full species name and canonical version.</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">binomial<span class="p">(</span><span class="s">&quot;Poa&quot;</span><span class="p">,</span> <span class="s">&quot;annua&quot;</span><span class="p">,</span> authority<span class="o">=</span><span class="s">&quot;L.&quot;</span><span class="p">)</span> +</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="o">&lt;</span>binomial<span class="o">&gt;</span> + genus<span class="o">:</span> Poa + epithet<span class="o">:</span> annua + canonical<span class="o">:</span> + species<span class="o">:</span> + authority<span class="o">:</span> L. +</code></pre></div> +<p>The package has a suite of functions to work on these taxonomic classes:</p> + +<ul> +<li><code>gethier()</code> - get hierarchy from a <code>taxon</code> class</li> +<li><code>scatter()</code> - make each row in taxonomic data.frame (<code>taxondf</code>) a separate <code>taxon</code> object within a single <code>taxa</code> object</li> +<li><code>assemble()</code> - make a <code>taxa</code> object into a <code>taxondf</code> data.frame</li> +<li><code>pick()</code> - pick out one or more taxonomic groups</li> +<li><code>pop()</code> - pop out (drop) one or more taxonomic groups</li> +<li><code>span()</code> - pick a range between two taxonomic groups (inclusive)</li> +<li><code>strain()</code> - filter by taxonomic groups, like dplyr&#39;s filter</li> +<li><code>name()</code> - get the taxon name for each <code>taxonref</code> object</li> +<li><code>uri()</code> - get the reference uri for each <code>taxonref</code> object</li> +<li><code>rank()</code> - get the taxonomic rank for each <code>taxonref</code> object</li> +<li><code>id()</code> - get the reference uri for each <code>taxonref</code> object</li> +</ul> + +<p>The approach in this package I suppose is sort of like <code>split-apply-combine</code> from <code>plyr</code>/<code>dplyr</code>, whereas this is aims to make it easy to do with taxonomic names.</p> + +<h2>Install</h2> + +<p>For examples below, you&#39;ll need the development version:</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">install.packages<span class="p">(</span><span class="s">&quot;binomen&quot;</span><span class="p">)</span> +</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;binomen&quot;</span><span class="p">)</span> +</code></pre></div> +<h2>Make a taxon</h2> + +<p>Make a taxon object</p> +<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="p">(</span>obj <span class="o">&lt;-</span> make_taxon<span class="p">(</span>genus<span class="o">=</span><span class="s">&quot;Poa&quot;</span><span class="p">,</span> epithet<span class="o">=</span><span class="s">&quot;annua&quot;</span><span class="p">,</span> authority<span class="o">=</span><span class="s">&quot;L.&quot;</span><span class="p">,</span> + family<span class="o">=</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span> clazz<span class="o">=</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span> kingdom<span class="o">=</span><span class="s">&#39;Plantae&#39;</span><span class="p">,</span> variety<span class="o">=</span><span class="s">&#39;annua&#39;</span><span class="p">))</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>Index to various parts of the object</p> + +<p>The binomial</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>binomial +<span class="c1">#&gt; &lt;binomial&gt;</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; epithet: annua</span> +<span class="c1">#&gt; canonical: Poa annua</span> +<span class="c1">#&gt; species: Poa annua L.</span> +<span class="c1">#&gt; authority: L.</span> +</code></pre></div> +<p>The authority</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>binomial<span class="o">$</span>authority +<span class="c1">#&gt; [1] &quot;L.&quot;</span> +</code></pre></div> +<p>The classification</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>grouping +<span class="c1">#&gt; &lt;grouping&gt;</span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>The family</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>grouping<span class="o">$</span>family +<span class="c1">#&gt; &lt;taxonref&gt;</span> +<span class="c1">#&gt; rank: family</span> +<span class="c1">#&gt; name: Poaceae</span> +<span class="c1">#&gt; id: none</span> +<span class="c1">#&gt; uri: none</span> +</code></pre></div> +<h2>Subset taxon objects</h2> + +<p>Get one or more ranks via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> pick<span class="p">(</span>family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; family: Poaceae</span> +obj <span class="o">%&gt;%</span> pick<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +</code></pre></div> +<p>Drop one or more ranks via <code>pop()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> pop<span class="p">(</span>family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +obj <span class="o">%&gt;%</span> pop<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>Get a range of ranks via <code>span()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> span<span class="p">(</span>kingdom<span class="p">,</span> family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +</code></pre></div> +<p>Extract classification as a <code>data.frame</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">gethier<span class="p">(</span>obj<span class="p">)</span> +<span class="c1">#&gt; rank name</span> +<span class="c1">#&gt; 1 kingdom Plantae</span> +<span class="c1">#&gt; 2 clazz Poales</span> +<span class="c1">#&gt; 3 family Poaceae</span> +<span class="c1">#&gt; 4 genus Poa</span> +<span class="c1">#&gt; 5 species Poa annua</span> +<span class="c1">#&gt; 6 variety annua</span> +</code></pre></div> +<h2>Taxonomic data.frame&#39;s</h2> + +<p>Make one</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>order <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Asterales&#39;</span><span class="p">,</span><span class="s">&#39;Asterales&#39;</span><span class="p">,</span><span class="s">&#39;Fagales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">),</span> + family <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Asteraceae&#39;</span><span class="p">,</span><span class="s">&#39;Asteraceae&#39;</span><span class="p">,</span><span class="s">&#39;Fagaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">),</span> + genus <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Helianthus&#39;</span><span class="p">,</span><span class="s">&#39;Helianthus&#39;</span><span class="p">,</span><span class="s">&#39;Quercus&#39;</span><span class="p">,</span><span class="s">&#39;Poa&#39;</span><span class="p">,</span><span class="s">&#39;Festuca&#39;</span><span class="p">,</span><span class="s">&#39;Holodiscus&#39;</span><span class="p">),</span> + stringsAsFactors <span class="o">=</span> <span class="kc">FALSE</span><span class="p">)</span> +<span class="p">(</span>df2 <span class="o">&lt;-</span> taxon_df<span class="p">(</span>df<span class="p">))</span> +<span class="c1">#&gt; order family genus</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus</span> +</code></pre></div> +<p>Parse - get rank order via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> pick<span class="p">(</span><span class="kp">order</span><span class="p">)</span> +<span class="c1">#&gt; order</span> +<span class="c1">#&gt; 1 Asterales</span> +<span class="c1">#&gt; 2 Asterales</span> +<span class="c1">#&gt; 3 Fagales</span> +<span class="c1">#&gt; 4 Poales</span> +<span class="c1">#&gt; 5 Poales</span> +<span class="c1">#&gt; 6 Poales</span> +</code></pre></div> +<p>get ranks order, family, and genus via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> pick<span class="p">(</span><span class="kp">order</span><span class="p">,</span> family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; order family genus</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus</span> +</code></pre></div> +<p>get range of names via <code>span()</code>, from rank <code>X</code> to rank <code>Y</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> span<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; family genus</span> +<span class="c1">#&gt; 1 Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poaceae Poa</span> +<span class="c1">#&gt; 5 Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poaceae Holodiscus</span> +</code></pre></div> +<p>Separate each row into a <code>taxon</code> class (many <code>taxon</code> objects are a <code>taxa</code> class)</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">scatter<span class="p">(</span>df2<span class="p">)</span> +<span class="c1">#&gt; [[1]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Helianthus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Asterales</span> +<span class="c1">#&gt; family: Asteraceae</span> +<span class="c1">#&gt; genus: Helianthus</span> +<span class="c1">#&gt; species: Helianthus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[2]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Helianthus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Asterales</span> +<span class="c1">#&gt; family: Asteraceae</span> +<span class="c1">#&gt; genus: Helianthus</span> +<span class="c1">#&gt; species: Helianthus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[3]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Quercus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Fagales</span> +<span class="c1">#&gt; family: Fagaceae</span> +<span class="c1">#&gt; genus: Quercus</span> +<span class="c1">#&gt; species: Quercus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[4]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[5]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Festuca none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Festuca</span> +<span class="c1">#&gt; species: Festuca none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[6]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Holodiscus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Holodiscus</span> +<span class="c1">#&gt; species: Holodiscus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; attr(,&quot;class&quot;)</span> +<span class="c1">#&gt; [1] &quot;taxa&quot;</span> +</code></pre></div> +<p>And you can re-assemble a data.frame from the output of <code>scatter()</code> with <code>assemble()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">out <span class="o">&lt;-</span> scatter<span class="p">(</span>df2<span class="p">)</span> +assemble<span class="p">(</span>out<span class="p">)</span> +<span class="c1">#&gt; order family genus species</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus Helianthus none</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus Helianthus none</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus Quercus none</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa Poa none</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca Festuca none</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus Holodiscus none</span> +</code></pre></div> + + Crossref programmatic clients - 2015-11-30T00:00:00-08:00 + 2015-11-30T00:00:00+01:00 http://recology.info//2015/11/crossref-clients <p>I gave two talks recently at the annual <a href="http://www.crossref.org/annualmeeting/agenda.html">Crossref meeting</a>, one of which was a somewhat technical overview of programmatic clients for Crossref APIs. Check out the talk <a href="https://crossref.wistia.com/medias/8rh0jm5eda">here</a>. I talked about the motivation for working with Crossref data by writing code/etc. rather than going the GUI route, then went over the various clients, with brief examples.</p> @@ -196,7 +465,7 @@ cr_cn<span class="p">(</span>dois<span class="o&qu pygbif - GBIF client for Python - 2015-11-12T00:00:00-08:00 + 2015-11-12T00:00:00+01:00 http://recology.info//2015/11/pygbif <p>I maintain an R client for the GBIF API, at <a href="https://github.com/ropensci/rgbif">rgbif</a>. Been working on it for a few years, and recently been thinking that there should be a nice low level client for Python as well. I didn&#39;t see one searching Github, etc. so I started working on one recently: <a href="https://github.com/sckott/pygbif">pygbif</a></p> @@ -332,7 +601,7 @@ cr_cn<span class="p">(</span>dois<span class="o&qu noaa - Integrated Surface Database data - 2015-10-21T00:00:00-07:00 + 2015-10-21T00:00:00+02:00 http://recology.info//2015/10/noaa-isd <p>I&#39;ve recently made some improvements to the functions that work with ISD (Integrated Surface Database) data.</p> @@ -499,7 +768,7 @@ ggplot<span class="p">(</span>res_all<span class=" Metrics for open source projects - 2015-10-19T00:00:00-07:00 + 2015-10-19T00:00:00+02:00 http://recology.info//2015/10/open-source-metrics <p>Measuring use of open source software isn&#39;t always straightforward. The problem is especially acute for software targeted largely at academia, where usage is not measured just by software downloads, but also by citations.</p> @@ -614,7 +883,7 @@ ggplot<span class="p">(</span>res_all<span class=" analogsea - an R client for the Digital Ocean API - 2015-10-02T00:00:00-07:00 + 2015-10-02T00:00:00+02:00 http://recology.info//2015/10/analogsea-cran <p><code>analogsea</code> is now on CRAN. We started developing the pkg back in <a href="https://github.com/sckott/analogsea/commit/b129164dd87969d2fc6bcf3b51576fe1da932fdb">May 2014</a>, but just now getting the first version on CRAN. It&#39;s a collaboration with <a href="http://had.co.nz/">Hadley</a> and <a href="https://github.com/wch/">Winston Chang</a>.</p> @@ -678,7 +947,7 @@ creating a Digital Ocean account, authenticating, and have many examples.</p& oai - an OAI-PMH client - 2015-09-11T00:00:00-07:00 + 2015-09-11T00:00:00+02:00 http://recology.info//2015/09/oai-client <p><code>oai</code> is a general purpose client to work with any &#39;OAI-PMH&#39; service. The &#39;OAI-PMH&#39; protocol is described at <a href="http://www.openarchives.org/OAI/openarchivesprotocol.html">http://www.openarchives.org/OAI/openarchivesprotocol.html</a>. The main functions follow the OAI-PMH verbs: </p> @@ -825,7 +1094,7 @@ creating a Digital Ocean account, authenticating, and have many examples.</p& fulltext - a package to help you mine text - 2015-08-07T00:00:00-07:00 + 2015-08-07T00:00:00+02:00 http://recology.info//2015/08/full-text <p>Finally, we got <code>fulltext</code> up on CRAN - our first commit was <a href="https://github.com/ropensci/fulltext/commit/2d4f7e270040b2c8914853113073fc4d3134445e">May last year</a>. <code>fulltext</code> is a package to facilitate text mining. It focuses on open access journals. This package makes it easier to search for articles, download those articles in full text if available, convert pdf format to plain text, and extract text chunks for vizualization/analysis. We are planning to add bits for analysis in future versions. We&#39;ve been working on this package for a while now. It has a lot of moving parts and package dependencies, so it took a while to get a first useable version.</p> @@ -1253,7 +1522,7 @@ ggplot<span class="p">(</span>df<span class="p&quo rnoaa - Weather data in R - 2015-07-07T00:00:00-07:00 + 2015-07-07T00:00:00+02:00 http://recology.info//2015/07/weather-data-with-rnoaa <p>NOAA provides a lot of weather data, across many different websites under different project names. The R package <code>rnoaa</code> accesses many of these, including:</p> @@ -1770,7 +2039,7 @@ sp<span class="o">::</span>plot<span class="p" rerddap - General purpose R client for ERDDAP servers - 2015-06-24T00:00:00-07:00 + 2015-06-24T00:00:00+02:00 http://recology.info//2015/06/rerddap <p><a href="http://upwell.pfeg.noaa.gov/erddap/information.html">ERDDAP</a> is a data server that gives you a simple, consistent way to download subsets of gridded and tabular scientific datasets in common file formats and make graphs and maps. Besides it’s own <a href="http://upwell.pfeg.noaa.gov/erddap/rest.html">RESTful interface</a>, much of which is designed based on <a href="https://en.wikipedia.org/wiki/OPeNDAP">OPeNDAP</a>, ERDDAP can act as an OPeNDAP server and as a <a href="https://en.wikipedia.org/wiki/Web_Map_Service">WMS</a> server for gridded data.</p> @@ -2070,7 +2339,7 @@ ggplot<span class="p">(</span>df<span class="p&quo iDigBio - a new data source in spocc - 2015-06-08T00:00:00-07:00 + 2015-06-08T00:00:00+02:00 http://recology.info//2015/06/idigbio-in-spocc <p><a href="https://www.idigbio.org/">iDigBio</a>, or <em>Integrated Digitized Biocollections</em>, collects and provides access to species occurrence data, and associated metadata (e.g., images of specimens, when provided). They collect data from <a href="https://www.idigbio.org/portal/publishers">a lot of different providers</a>. They have a nice web interface for searching, check out <a href="https://www.idigbio.org/portal/search">idigbio.org/portal/search</a>. </p> @@ -2189,7 +2458,7 @@ leaflet<span class="p">(</span>data <span class="o openadds - open addresses client - 2015-05-18T00:00:00-07:00 + 2015-05-18T00:00:00+02:00 http://recology.info//2015/05/openadds <p><code>openadds</code> talks to <a href="http://openaddresses.io/">Openaddresses.io</a>. a run down of its things:</p> @@ -2349,7 +2618,7 @@ oa_combine<span class="p">(</span>x<span class="p& lawn - a new package to do geospatial analysis - 2015-05-18T00:00:00-07:00 + 2015-05-18T00:00:00+02:00 http://recology.info//2015/05/mow-the-lawn <p><code>lawn</code> is an R wrapper for the Javascript library <a href="http://turfjs.org/">turf.js</a> for advanced geospatial analysis. In addition, we have a few functions to interface with the <a href="https://github.com/mapbox/geojson-random">geojson-random</a> Javascript library.</p> @@ -2716,7 +2985,7 @@ lawn_extent<span class="p">(</span>dat<span class=" geojsonio - a new package to do geojson things - 2015-04-30T00:00:00-07:00 + 2015-04-30T00:00:00+02:00 http://recology.info//2015/04/geojson-io <p><code>geojsonio</code> converts geographic data to GeoJSON and TopoJSON formats - though the focus is mostly on GeoJSON </p> @@ -2978,7 +3247,7 @@ out <span class="o">&lt;-</span> topojson_read<span the new way - httsnap - 2015-04-29T00:00:00-07:00 + 2015-04-29T00:00:00+02:00 http://recology.info//2015/04/the-new-way <p>Inspired by <code>httpie</code>, a Python command line client as a sort of drop in replacement for <code>curl</code>, I am playing around with something similar-ish in R, at least in spirit. I started a little R pkg called <code>httsnap</code> with the following ideas:</p> @@ -3185,7 +3454,7 @@ out <span class="o">&lt;-</span> topojson_read<span Faster solr with csv - 2015-03-20T00:00:00-07:00 + 2015-03-20T00:00:00+01:00 http://recology.info//2015/03/faster-solr <p>With the <a href="https://github.com/ropensci/solr/issues/47">help of user input</a>, I&#39;ve tweaked <code>solr</code> just a bit to make things faster using default setings. I imagine the main interface for people using the <code>solr</code> R client is via <code>solr_search()</code>, which used to have <code>wt=json</code> by default. Changing this to <code>wt=csv</code> gives better performance. And it sorta makes sense to use csv, as the point of using an R client is probably do get data eventually into a data.frame, so it makes sense to go csv format (Already in tabular format) if it&#39;s faster too.</p> @@ -3299,7 +3568,7 @@ stats, etc. </p> PUT dataframes on your couch - 2015-03-12T00:00:00-07:00 + 2015-03-12T00:00:00+01:00 http://recology.info//2015/03/couch-dataframes <p>It would be nice to easily push each row or column of a data.frame into CouchDB instead of having to prepare them yourself into JSON, then push in to couch. I recently added ability to push data.frame&#39;s into couch using the normal <code>PUT /{db}</code> method, and added support for the couch bulk API.</p> @@ -3480,7 +3749,7 @@ out<span class="p">[</span><span class="m"> csl - an R client for Citation Style Language data - 2015-03-11T00:00:00-07:00 + 2015-03-11T00:00:00+01:00 http://recology.info//2015/03/csl-client <p>CSL (Citation Style Language) is used quite widely now to specify citations in a standard fashion. <code>csl</code> is an R client for exploring CSL styles, and is inspired by the Ruby gem <a href="https://github.com/inukshuk/csl-ruby">csl</a>. For example, csl is given back in the PLOS Lagotto article level metric API (follow <a href="http://alm.plos.org/api/v5/articles?ids=10.1371%252Fjournal.pone.0025110&amp;info=detail&amp;source_id=crossref">http://alm.plos.org/api/v5/articles?ids=10.1371%252Fjournal.pone.0025110&amp;info=detail&amp;source_id=crossref</a>).</p> @@ -3613,7 +3882,7 @@ style_exists<span class="p">(</span><span class="s Elasticsearch backup and restore - 2015-02-26T21:00:00-08:00 + 2015-02-27T06:00:00+01:00 http://recology.info//2015/02/elasticsearch-backup-restore <h2>setup backup</h2> <div class="highlight"><pre><code class="language-text" data-lang="text">curl -XPUT &#39;http://localhost:9200/_snapshot/my_backup/&#39; -d &#39;{ @@ -3647,7 +3916,7 @@ style_exists<span class="p">(</span><span class="s note to self, secure elasticsearch - 2015-02-26T10:00:00-08:00 + 2015-02-26T19:00:00+01:00 http://recology.info//2015/02/secure-elasticsearch <p>Recently I spun up a box on a cloud hosting provider planning to make a tens of thousdands of queries to an Elasticsearch instance on the same box. I could have done this on my own machine, but didn&#39;t want to take up compute resources.</p> @@ -3682,7 +3951,7 @@ style_exists<span class="p">(</span><span class="s Package development - 2015-02-14T00:00:00-08:00 + 2015-02-14T00:00:00+01:00 http://recology.info//2015/02/package-dev <p>Someone asked recently about tips for package development workflow to optimize a successful submission to CRAN.</p> @@ -3761,7 +4030,7 @@ style_exists<span class="p">(</span><span class="s httping - ping and time http requests - 2015-01-30T00:00:00-08:00 + 2015-01-30T00:00:00+01:00 http://recology.info//2015/01/httping <p>I&#39;ve been working on a little thing called <code>httping</code> - a small R package that started as a pkg to Ping urls and time requests. It&#39;s a port of the Ruby gem <a href="https://github.com/jpignata/httping">httping</a>. The <code>httr</code> package is in <code>Depends</code> in this package, so its functions can be called directly, without having to load <code>httr</code> explicitly yourself.</p> @@ -3956,7 +4225,7 @@ devtools<span class="o">::</span>install_github<span cl elastic - Elasticsearch from R - 2015-01-29T00:00:00-08:00 + 2015-01-29T00:00:00+01:00 http://recology.info//2015/01/elasticsearch <p>We&#39;ve (ropensci) been working on an R client for interacting with <a href="http://www.elasticsearch.org/">Elasticsearch</a> for a while now, first commit was November 2013.</p> @@ -4618,7 +4887,7 @@ hits <span class="o">&lt;-</span> <span class=" binomen - taxonomic classes and parsing - 2015-01-19T00:00:00-08:00 + 2015-01-19T00:00:00+01:00 http://recology.info//2015/01/binomen <p>I maintain, along with other <a href="https://github.com/ropensci/taxize/graphs/contributors">awesome people</a>, the <a href="https://github.com/ropensci/taxize">taxize</a> R package - a taxonomic toolbelt for R, for interacting with taxonomic data sources on the web. </p> @@ -4767,7 +5036,7 @@ devtools<span class="o">::</span>install_github<span cl discgolf - Dicourse from R - 2015-01-15T00:00:00-08:00 + 2015-01-15T00:00:00+01:00 http://recology.info//2015/01/discourse-in-r <p><a href="http://www.discourse.org/">Discourse</a> is a great discussion forum application. It&#39;s another thing from <a href="https://en.wikipedia.org/wiki/Jeff_Atwood">Jeff Atwood</a>, the co-founder of <a href="http://stackoverflow.com/">Stackoverflow/Stackexchange</a>. The installation is epecially easy with their dockerized installation setup on DigitalOcean ([instructions here][https://www.digitalocean.com/community/tutorials/how-to-install-discourse-on-ubuntu-14-04]). </p> @@ -4923,7 +5192,7 @@ create_topic<span class="p">(</span>title<span class=&q R I/O for geojson and topojson - 2015-01-06T00:00:00-08:00 + 2015-01-06T00:00:00+01:00 http://recology.info//2015/01/geojson-topojson-io <p>At rOpenSci we&#39;ve been working on an R package (<code>geojsonio</code>) to make converting R data in various formats to <a href="http://geojson.org/geojson-spec.html">geoJSON</a> and <a href="https://github.com/topojson/topojson-specification/blob/master/README.md">topoJSON</a>, and vice versa. We hope to do this one job very well, and handle all reasonable use cases.</p> @@ -5229,7 +5498,7 @@ validate<span class="p">(</span>x<span class="p&qu gistr - R client for GitHub gists - 2015-01-05T00:00:00-08:00 + 2015-01-05T00:00:00+01:00 http://recology.info//2015/01/gistr-github-gists <p>GitHub has this site <a href="https://gist.github.com/">https://gist.github.com/</a> in which we can share code, text, images, maps, plots, etc super easily, without having to open up a repo, etc. GitHub gists are a great way to throw up an example use case to show someone, or show code that&#39;s throwing errors to a support person, etc. In addition, there&#39;s API access, which means we can interact with Gists not just from their web interface, but from the command line, or any programming language. There are clients for <a href="https://github.com/mbostock/gistup">Node.js</a>, <a href="https://rubygems.org/gems/gist">Ruby</a>, <a href="https://pypi.python.org/pypi/gists/0.4.6">Python</a>, and on and on. But AFAIK there wasn&#39;t one for R. Along with <a href="https://github.com/ramnathv">Ramnath</a> and others, we&#39;ve been working on an R client for gists. <code>v0.1</code> is <a href="http://cran.r-project.org/web/packages/gistr/index.html">now on CRAN</a>. Below is an overview. </p> @@ -5545,7 +5814,7 @@ gist_create<span class="p">(</span>code <span class=&qu pytaxize - low level ITIS functions - 2014-12-26T00:00:00-08:00 + 2014-12-26T00:00:00+01:00 http://recology.info//2014/12/pytaxize-itis <p>I&#39;ve been working on a Python port of the R package <code>taxize</code> that I maintain. It&#39;s still early days with this Python library, I&#39;d love to know what people think. For example, I&#39;m giving back Pandas DataFrame&#39;s from most functions. Does this make sense?</p> @@ -5665,7 +5934,7 @@ gist_create<span class="p">(</span>code <span class=&qu Museum metadata - the Asian Art Museum of San Francisco - 2014-12-10T00:00:00-08:00 + 2014-12-10T00:00:00+01:00 http://recology.info//2014/12/museum-aamsf <p>I was in San Francisco last week for an altmetrics conference at PLOS. While there, I visited the <a href="http://www.asianart.org/">Asian Art Museum</a>, just the <a href="http://www.asianart.org/exhibitions_index/roads-of-arabia">Roads of Arabia exhibition</a>.</p> @@ -5778,7 +6047,7 @@ devtools<span class="o">::</span>install_github<span cl icanhaz altmetrics - 2014-12-08T11:45:29-08:00 + 2014-12-08T20:45:29+01:00 http://recology.info//2014/12/icanhaz-altmetrics <p>The Lagotto application is a Rails app that collects and serves up via RESTful API article level metrics data for research objects. So far, this application has only been applied to scholarly articles, but will <a href="http://articlemetrics.github.io/MDC/">see action on datasets soon</a>. </p> @@ -5922,7 +6191,7 @@ Alm.alm<span class="p">(</span>ids<span class="o&q Dealing with multi handle errors - 2014-12-08T00:00:00-08:00 + 2014-12-08T00:00:00+01:00 http://recology.info//2014/12/multi-handle <p>At rOpenSci we occasssionally hear from our users that they run into an error like:</p> <div class="highlight"><pre><code class="language-r" data-lang="r">Error <span class="kr">in</span> <span class="kr">function</span> <span class="p">(</span>type<span class="p">,</span> msg<span class="p">,</span> asError <span class="o">=</span> <span class="kc">TRUE</span><span class="p">)</span> <span class="o">:</span> @@ -5982,7 +6251,7 @@ devtools<span class="o">::</span>install_github<span cl Altmetrics from anywhere - 2014-12-08T00:00:00-08:00 + 2014-12-08T00:00:00+01:00 http://recology.info//2014/12/altmetrics-anywhere <p>The Lagotto application is a Rails app that collects and serves up via RESTful API article level metrics data for research objects. So far, this application has only been applied to scholarly articles, but will <a href="http://articlemetrics.github.io/MDC/">see action on datasets soon</a>. </p> @@ -6126,7 +6395,7 @@ Alm.alm<span class="p">(</span>ids<span class="o&q Publications by author country - 2014-12-03T00:00:00-08:00 + 2014-12-03T00:00:00+01:00 http://recology.info//2014/12/rplos-pubs-country <p>I just missed another chat on the rOpenSci website:</p> @@ -6238,7 +6507,7 @@ ggplot<span class="p">(</span>df<span class="p&quo http codes - 2014-12-02T08:01:50-08:00 + 2014-12-02T17:01:50+01:00 http://recology.info//2014/12/http-codes <p>Recently noticed a little Python library called <a href="https://github.com/rspivak/httpcode">httpcode</a> that does a simple thing: gives information on http codes in the CLI. I thought this could maybe potentially be useful for R. So I made an R version. </p> @@ -6443,7 +6712,7 @@ ggplot<span class="p">(</span>df<span class="p&quo taxize workflows - 2014-12-02T00:00:00-08:00 + 2014-12-02T00:00:00+01:00 http://recology.info//2014/12/taxize-workflows <p>A missed chat on the rOpenSci website the other day asked:</p> @@ -6628,7 +6897,7 @@ dat_new <span class="o">&lt;-</span> tbl_df<span cl 1000 commits to taxize - 2014-11-28T00:00:00-08:00 + 2014-11-28T00:00:00+01:00 http://recology.info//2014/11/taxize-1000 <p>Just today we&#39;ve hit 1000 commits on <code>taxize</code>! <code>taxize</code> is an R client to search across lots of taxonomic databases on the web. In honor of the 1000 commit milestone, here&#39;s some stats on the project.</p> @@ -6804,7 +7073,7 @@ out <span class="o">%&gt;%</span> Intro to alpha ckanr - R client for CKAN RESTful API - 2014-11-26T03:42:36-08:00 + 2014-11-26T12:42:36+01:00 http://recology.info//2014/11/ckanr-intro <p>Recently I had need to create a client for scraping museum metadata to help out some folks that use that kind of data. It&#39;s called <a href="https://github.com/ropensci/musemeta">musemeta</a>. One of the data sources in that package uses the open source <em>data portal software</em> <a href="http://ckan.org/">CKAN</a>, and so we can interact with <a href="http://docs.ckan.org/en/latest/api/index.html">the CKAN API</a> to get data. Since many groups can use CKAN API/etc infrastucture because it&#39;s open source, I thought why not have a general purpose R client for this, since <a href="https://github.com/ckan/ckan/wiki/CKAN-API-Clients">there are other clients</a> for Python, PHP, Ruby, etc. </p> @@ -7169,7 +7438,7 @@ out<span class="p">[,</span> <span class="o"& Fun with the GitHub API - 2014-11-26T00:00:00-08:00 + 2014-11-26T00:00:00+01:00 http://recology.info//2014/11/github-fun <p>Recently I&#39;ve had fun playing with the GitHub API, and here are some notes to self about this fun having.</p> @@ -7313,7 +7582,7 @@ has_term<span class="p">(</span><span class="s&quo sofa - reboot - 2014-11-18T00:00:00-08:00 + 2014-11-18T00:00:00+01:00 http://recology.info//2014/11/sofa <p>I&#39;ve reworked <code>sofa</code> recently after someone reported a bug in the package. Since the last post on this package on 2013-06-21, there&#39;s a bunch of changes:</p> @@ -7594,7 +7863,7 @@ db_list<span class="p">(</span>config<span class=" Conditionality meta-analysis data - 2014-10-06T00:00:00-07:00 + 2014-10-06T00:00:00+02:00 http://recology.info//2014/10/conditionality-meta-analysis <h2>The paper</h2> @@ -7723,7 +7992,7 @@ fs_make_public<span class="p">(</span>figid<span class= rsunlight - R client for Sunlight Labs APIs - 2014-08-11T00:00:00-07:00 + 2014-08-11T00:00:00+02:00 http://recology.info//2014/08/rsunlight <p>My <a href="http://recology.info/2014/05/rsunlight/">last blog post on this package</a> was so long ago the package wrapped both New York Times APIs and Sunlight Labs APIs and the package was called <code>govdat</code>. I split that package up into <code>rsunlight</code> for Sunlight Labs APIs and <code>rtimes</code> for some New York Times APIs. <code>rtimes</code> is <a href="https://github.com/ropengov/rtimes">in development at Github</a>.</p> @@ -7971,7 +8240,7 @@ ggplot<span class="p">(</span>res<span class="p&qu analogsea - v0.1 notes - 2014-06-18T00:00:00-07:00 + 2014-06-18T00:00:00+02:00 http://recology.info//2014/06/analogsea-v01 <p>My <a href="http://recology.info/2014/05/analogsea/">last blog </a> post introduced the R package I&#39;m working on <code>analogsea</code>, an R client for the Digital Ocean API.</p> @@ -8251,7 +8520,7 @@ droplets<span class="p">(</span><span class="m&quo analogsea - an R client for the Digital Ocean API - 2014-05-28T00:00:00-07:00 + 2014-05-28T00:00:00+02:00 http://recology.info//2014/05/analogsea <p>I think this package name is my best yet. Maybe it doesn&#39;t make sense though? At least it did at the time...</p> @@ -8417,7 +8686,7 @@ droplets<span class="p">(</span><span class="m&quo Logistic plot reboot - 2014-05-22T00:00:00-07:00 + 2014-05-22T00:00:00+02:00 http://recology.info//2014/05/logplotreboot <p>Someone asked about plotting something like this today</p> @@ -8538,7 +8807,7 @@ droplets<span class="p">(</span><span class="m&quo cowsay - ascii messages and warnings for R - 2014-02-20T00:00:00-08:00 + 2014-02-20T00:00:00+01:00 http://recology.info//2014/02/cowsay <h2>The history</h2> @@ -8752,7 +9021,7 @@ install_github<span class="p">(</span><span class=" cites - citation stuff from the command line - 2014-01-18T00:00:00-08:00 + 2014-01-18T00:00:00+01:00 http://recology.info//2014/01/cites <p>I&#39;ve been learning Ruby, and decided to scratch an itch: getting citations for papers to put in a bibtex file or my Zotero library. This usually requires two parts: 1) searching for an article with keywords, and then 2) getting the citation once the paper is found. Since I am lazy, I would prefer to do this from the command line instead of opening up a browser. Thus =&gt; <code>cites</code>. (Note, I&#39;m sure someone has created something better - the point is I&#39;m learnin&#39; me some Ruby) <br><br> @@ -8892,7 +9161,7 @@ ER - rgauges - fun with hourly web site analytics - 2014-01-17T00:00:00-08:00 + 2014-01-17T00:00:00+01:00 http://recology.info//2014/01/rgauges-hourly <p><a href="http://get.gaug.es/">Gaug.es</a> is a really nice looking analytics platform as an alternative to Google Analytics. It is a paid service, but not that expensive really. </p> @@ -9176,7 +9445,7 @@ ggplot<span class="p">(</span>oneday<span class="p Jekyll - an intro - 2013-11-20T00:00:00-08:00 + 2013-11-20T00:00:00+01:00 http://recology.info//2013/11/jekyll-intro <p>I started using Jekyll when I didn&#39;t really know HTML, CSS, or Ruby - so I&#39;ve had to learn a lot - but using Jekyll has been a great learning experience for all those languages. </p> @@ -9299,7 +9568,7 @@ My second blog post!</code></pre></figure> Code display in scholarly journals - 2013-10-25T00:00:00-07:00 + 2013-10-25T00:00:00+02:00 http://recology.info//2013/10/codeinpapers <p>Code in journals, that is, code you would type to do some programmatic operation in say R or Python, is kind of a mess to say the least. Okay, so you can <strong>SEE</strong> code in papers, but code is not formatted in a way that facilites reuse. If an author in a paper writes out some code for software they create, or an analysis they do in the paper, wouldn&#39;t it be nice for a reader to be able to copy and paste that code directly into whatever environment that code should execute in, and actually work. Of course there is dependencies, etc. for that software to worry about, but here I am just concerned with the code formatting in articles. Code is displayed as an image in some cases (gasp!). Additionally, there&#39;s this thing called the internet, and we can use color, so let&#39;s highlight code already. At least in one of our recent <a href="http://ropensci.org/">rOpenSci</a> papers in F1000 Research, <a href="http://f1000research.com/articles/2-191/v1">they do use syntax highlighting</a> - w00t!</p> @@ -9312,7 +9581,7 @@ My second blog post!</code></pre></figure> Guide to using rOpenSci packages during the US Gov't shutdown - 2013-10-08T00:00:00-07:00 + 2013-10-08T00:00:00+02:00 http://recology.info//2013/10/shutdown <p><em>Note: This is cross-posted from the <a href="http://ropensci.org/blog">rOpenSci blog</a>, which will update with this post when our technical snafu is fixed.</em></p> @@ -9400,7 +9669,7 @@ For those wanting to get NOAA climate data, perhaps check out the <a href=&qu Taxonomy data from the web in three languages - 2013-09-27T00:00:00-07:00 + 2013-09-27T00:00:00+02:00 http://recology.info//2013/09/taxonomy-in-three-acts <p>Eduard Szöcs and I started developing a taxonomic toolbelt for the R language a while back , which lets you interact with a multitude of taxonomic databases on the web. We have a paper in F1000Research if you want to find out more (see <a href="http://f1000research.com/articles/2-191/v1">here</a>).</p> @@ -9490,7 +9759,7 @@ python setup.py install</code></pre></figure> Pollinator niche breadth and natural enemies - 2013-09-19T00:00:00-07:00 + 2013-09-19T00:00:00+02:00 http://recology.info//2013/09/natenemies <p>I am on my way out of academia, so I want to share what I won&#39;t ever get around to finishing. I started a paper many years ago examining the prevalence of natural enemy pressure on pollinators, and patterns of occurrence of pollinator natural enemies in relation to plant attributes. </p> @@ -9505,7 +9774,7 @@ python setup.py install</code></pre></figure> govdat - SunlightLabs and New York Times Congress data via R - 2013-08-28T00:00:00-07:00 + 2013-08-28T00:00:00+02:00 http://recology.info//2013/08/govdat-vignette <p>I started an R package a while back, and a few people have shown interest, so I thought it was time to revist the code. govdat is an interface to various APIs for government data: currently the Sunlight Labs APIs, and the New York Times congress API. Returned objects from functions are simple lists. In future versions of govdat, I may change how data is returned. The following are examples (which is also the package vignette) of using the Sunlight Labs API. I will add examples of using the New York Times Congress API once their site is up again; I&#39;m doing this on 2013-08-28, just after the takedown of their site.</p> @@ -9820,7 +10089,7 @@ ldply<span class="p">(</span>out<span class="p&quo Engaging the public on climate change through phenology data - 2013-08-18T00:00:00-07:00 + 2013-08-18T00:00:00+02:00 http://recology.info//2013/08/phenology <h2>ScienceOnline Climate</h2> @@ -9845,7 +10114,7 @@ ldply<span class="p">(</span>out<span class="p&quo Working with climate data from the web in R - 2013-08-17T00:00:00-07:00 + 2013-08-17T00:00:00+02:00 http://recology.info//2013/08/sciordata <p>I recently attended <a href="http://scioclimate.wikispaces.com">ScienceOnline Climate</a>, a conference in Washington, D.C. at AAAS. You may have heard of the <a href="https://twitter.com/#sciox">ScienceOnline annual meeting in North Carolina</a> - this was one of their topical meetings focused on Climate Change. I moderated a session on <a href="http://scioclimate.wikispaces.com/3W.+Working+With+Science+Data+From+Around+The+Web">working with data from the web in R</a>, focusing on climate data. Search Twitter for #scioClimate for tweets from the conference, and #sciordata for tweets from the session I ran. The following is an abbreviated demo of what I did in the workshop showing some of what you can do with climate data in R using our packages.</p> @@ -9995,7 +10264,7 @@ ggplot<span class="p">(</span>df<span class="p&quo R ecology workshop - 2013-07-31T00:00:00-07:00 + 2013-07-31T00:00:00+02:00 http://recology.info//2013/07/r-ecology-workshop <p>After <a href="http://sckott.github.io/2013/07/r-resources/">my presentation yesterday</a> to a group of grad students on R resources, I did a presentation today on intro to R data manipulation, visualizations, and analyses/visualizations of biparite networks and community level analyses (diversity, rarefaction, ordination, etc.). As I said <a href="http://sckott.github.io/2013/07/r-resources/">yesterday</a> I&#39;ve been playing with two ways to make reproducible presentations in R: <a href="http://www.rstudio.com/ide/docs/presentations/overview">RStudio&#39;s presentations</a> built in to RStudio IDE, and <a href="http://slidify.org/">Slidify</a>. Yesterday I went with RStudio&#39;s product - today I used Slidify. See the Markdown file for the presentation <a href="https://github.com/sckott/posterstalks/blob/gh-pages/sfu/resources/r_resources.Rpres">here</a>. </p> @@ -10012,7 +10281,7 @@ ggplot<span class="p">(</span>df<span class="p&quo R resources - 2013-07-30T00:00:00-07:00 + 2013-07-30T00:00:00+02:00 http://recology.info//2013/07/r-resources <p>I&#39;m doing a presentation today to grad students on R resources. I have been writing HTML presentations recently, but some great tools are now available to convert text that is easy to read and write to presentations. </p> @@ -10036,7 +10305,7 @@ ggplot<span class="p">(</span>df<span class="p&quo Beyond academia - 2013-07-25T00:00:00-07:00 + 2013-07-25T00:00:00+02:00 http://recology.info//2013/07/beyond-academia <p>As ecologists, we often start graduate school worshiping the ivory tower of academia with its freedom to pursue important ecological questions. However, studies have shown that most of us do not end up in academia. Greater numbers of ecology graduates are leaving the ivory tower for non-academic career paths. But for many graduates, moving from an academic environment to a non-academic job may be difficult. In graduate school we are trained to work in a particular way, often with loose deadlines and unlimited intellectual freedom (within reason of course). The culture and expectations of the non-academic world may be quite different. What are the skills that you need in a government job, or in science journalism? How do you market yourself for a non-academic position? This is a timely topic because funding to academic ecologists is being cut, leaving fewer opportunities in the academic arena. In fact, an ESA Student Section survey found that an ESA 2013 session on non-academic career paths in ecology was the topic of greatest interest.</p> @@ -10069,7 +10338,7 @@ ggplot<span class="p">(</span>df<span class="p&quo On writing, sharing, collaborating, and hosting code for science - 2013-07-20T00:00:00-07:00 + 2013-07-20T00:00:00+02:00 http://recology.info//2013/07/code <p>I recently engaged with a number of tweeps in response to my tweet:</p> @@ -10132,7 +10401,7 @@ ggplot<span class="p">(</span>df<span class="p&quo R to GeoJSON - 2013-06-30T00:00:00-07:00 + 2013-06-30T00:00:00+02:00 http://recology.info//2013/06/geojson <p><strong>UPDATE</strong> As you can see in Patrick&#39;s comment below you can convert to GeoJSON format files with rgdal as an alternative to calling the Ogre web API described below. See <a href="https://github.com/patperu/write2geojson/blob/master/write-geojson.R">here</a> for example code for converting to GeoJSON with rgdal.</p> @@ -10231,7 +10500,7 @@ git push</code></pre></figure> Put some cushions on the sofa - 2013-06-21T00:00:00-07:00 + 2013-06-21T00:00:00+02:00 http://recology.info//2013/06/sofa <p>I posted earlier this week about sofa (<a href="http://sckott.github.io/2013/06/couch/">here</a>), introducing a package I started recently that interacts with CouchDB from R. There&#39;s been a fair amount of response at least in terms of page views, so I&#39;ll take that as a sign to keep going. </p> @@ -10394,7 +10663,7 @@ $rev Coffeehouse - an aggregator for blog posts about data, data management, etc. - 2013-06-18T00:00:00-07:00 + 2013-06-18T00:00:00+02:00 http://recology.info//2013/06/coffeehouse <p>Have you heard of <a href="http://www.dataone.org/">DataONE</a>? It stands for the Data Observation Network for Earth, and I am involved in the <a href="http://www.dataone.org/working_groups/community-education-and-engagement">Community Education and Engagement working group</a> at DataONE. We try to communicate about data, data management, and similar things to scientists and other DataONE <em>stakeholders</em>. </p> @@ -10434,7 +10703,7 @@ $rev Stashing and playing with raw data locally from the web - 2013-06-17T00:00:00-07:00 + 2013-06-17T00:00:00+02:00 http://recology.info//2013/06/couch <p>It is getting easier to get data directly into R from the web. Often R packages that retrieve data from the web return useful R data structures to users like a data.frame. This is a good thing of course to make things user friendly. </p> @@ -10714,7 +10983,7 @@ $views$foo Fylopic, an R wrapper to Phylopic - 2013-06-01T00:00:00-07:00 + 2013-06-01T00:00:00+02:00 http://recology.info//2013/06/fylopic <h2>What is PhyloPic?</h2> @@ -10847,7 +11116,7 @@ qplot<span class="p">(</span>x <span class="o" BISON USGS species occurrence data - 2013-05-27T00:00:00-07:00 + 2013-05-27T00:00:00+02:00 http://recology.info//2013/05/rbison <p>The USGS recently released a way to search for and get species occurrence records for the USA. The service is called <a href="http://bison.usgs.ornl.gov/">BISON</a> (Biodiversity Information Serving Our Nation). The service has <a href="http://bison.usgs.ornl.gov/">a web interface</a> for human interaction in a browser, and <a href="http://bison.usgs.ornl.gov/services.html">two APIs</a> (application programming interface) to allow machines to interact with their database. One of the APIs allows you to search and retrieve data, and the other gives back maps as either a heatmap or a species occurrence map. The latter is more appropriate for working in a browser, so I&#39;ll leave that to the web app folks. </p> @@ -10989,7 +11258,7 @@ bisonmap<span class="p">(</span>input <span class=" Scholarly metadata in R - 2013-03-16T00:00:00-07:00 + 2013-03-16T00:00:00+01:00 http://recology.info//2013/03/r-metadata <p>Scholarly metadata - the meta-information surrounding articles - can be super useful. Although metadata does not contain the full content of articles, it contains a lot of useful information, including title, authors, abstract, URL to the article, etc. </p> @@ -11292,7 +11561,7 @@ ggplot<span class="p">(</span>toplot_<span class=" Visualizing rOpenSci collaboration - 2013-03-08T00:00:00-08:00 + 2013-03-08T00:00:00+01:00 http://recology.info//2013/03/ropensci-collaboration <p>We (<a href="http://ropensci.org/">rOpenSci</a>) have been writing code for R packages for a couple years, so it is time to take a look back at the data. What data you ask? The commits data from GitHub ~ data that records who did what and when. </p> @@ -11409,7 +11678,7 @@ plotweb<span class="p">(</span>sortweb<span class=" Academia reboot - 2013-02-22T00:00:00-08:00 + 2013-02-22T00:00:00+01:00 http://recology.info//2013/02/academia-reboot <h2>Reboot</h2> @@ -11459,7 +11728,7 @@ plotweb<span class="p">(</span>sortweb<span class=" Getting a simple tree via NCBI - 2013-02-14T00:00:00-08:00 + 2013-02-14T00:00:00+01:00 http://recology.info//2013/02/common-tree <p>I was just at the <a href="http://www.evoio.org/wiki/Phylotastic">Phylotastic hackathon</a> in Tucson, AZ at the <a href="http://www.iplantcollaborative.org/">iPlant</a> facilities at the UofA.</p> @@ -11531,7 +11800,7 @@ plot<span class="p">(</span>tree2<span class="p&qu testing ifttt recipe, ignore - 2013-01-26T00:00:00-08:00 + 2013-01-26T00:00:00+01:00 http://recology.info//2013/01/ifttt-test <p>testing ifttt recipe</p> @@ -11540,7 +11809,7 @@ plot<span class="p">(</span>tree2<span class="p&qu Waiting for an API request to complete - 2013-01-26T00:00:00-08:00 + 2013-01-26T00:00:00+01:00 http://recology.info//2013/01/api-token <h3>Dealing with API tokens in R</h3> @@ -11579,7 +11848,7 @@ timeout <span class="o">&lt;-</span> <span class=&q Resolving species names when you have a lot of them - 2013-01-25T00:00:00-08:00 + 2013-01-25T00:00:00+01:00 http://recology.info//2013/01/tnrs-use-case <h3><strong>taxize use case: Resolving species names when you have a lot of them</strong></h3> @@ -11786,7 +12055,7 @@ outdf<span class="p">[</span>outdf<span class="o&q Open Science Challenge - 2013-01-08T00:00:00-08:00 + 2013-01-08T00:00:00+01:00 http://recology.info//2013/01/open-science-challenge <p><img src="https://raw.github.com/sckott/sckott.github.com/master/public/img/ropensci_challenge.png" alt="center"></p> @@ -11852,7 +12121,7 @@ outdf<span class="p">[</span>outdf<span class="o&q Is invasive? - 2012-12-13T00:00:00-08:00 + 2012-12-13T00:00:00+01:00 http://recology.info//2012/12/is-invasive <p>The Global Invasive Species Database (GISD) (see their website for more info <a href="http://www.issg.org/database/welcome/">here</a>) has data on the invasiveness status of many species. From <code>taxize</code> you can now query the GISD database. </p> @@ -11932,7 +12201,7 @@ Done Shiny apps are awesome - 2012-12-10T00:00:00-08:00 + 2012-12-10T00:00:00+01:00 http://recology.info//2012/12/shiny-r <p>RStudio has a new product called <code>Shiny</code> that, quoting from their website, &quot;makes it super simple for R users like you to turn analyses into interactive web applications that anyone can use&quot;. <a href="http://www.rstudio.com/shiny/">See here</a> for more information. </p> @@ -12017,7 +12286,7 @@ shinyUI<span class="p">(</span>pageWithSidebar<span cla One R package for all your taxonomic needs - 2012-12-06T00:00:00-08:00 + 2012-12-06T00:00:00+01:00 http://recology.info//2012/12/taxize <p>UPDATE: there were some errors in the tests for <code>taxize</code>, so the binaries aren&#39;t avaiable yet. You can install from source though, see below. </p> @@ -12401,7 +12670,7 @@ out<span class="p">[</span><span class="kp"&g Altecology, a call to unconference action - 2012-11-15T00:00:00-08:00 + 2012-11-15T00:00:00+01:00 http://recology.info//2012/11/altecology <p>Note: This post is cross-posted on Sandra Chung&#39;s blog <a href="http://sandrachung.com/">here</a>.</p> @@ -12480,7 +12749,7 @@ out<span class="p">[</span><span class="kp"&g Displaying Your Data in Google Earth Using R2G2 - 2012-10-24T00:00:00-07:00 + 2012-10-24T00:00:00+02:00 http://recology.info//2012/10/R2G2-package <p>Have you ever wanted to easily visualize your ecology data in <a href="http://earth.google.com">Google Earth</a>? <a href="http://cran.r-project.org/web/packages/R2G2/index.html">R2G2</a> is a new package for R, available via <a href="http://cran.r-project.org/">R CRAN</a> and formally described in <a href="http://onlinelibrary.wiley.com/doi/10.1111/1755-0998.12012/abstract">this Molecular Ecology Resources article</a>, which provides a user-friendly bridge between R and the Google Earth interface. Here, we will provide a brief introduction to the package, including a short tutorial, and then encourage you to try it out with your own data!</p> @@ -12564,7 +12833,7 @@ out<span class="p">[</span><span class="kp"&g Getting taxonomic names downstream - 2012-10-16T00:00:00-07:00 + 2012-10-16T00:00:00+02:00 http://recology.info//2012/10/get-taxa-downstream <p>It can be a pain in the ass to get taxonomic names. For example, I sometimes need to get all the Class names for a set of species. This is a relatively easy problem using the <a href="http://www.itis.gov/ws_description.html">ITIS API</a> (example below). </p> @@ -12655,7 +12924,7 @@ out<span class="p">[</span><span class="kp"&g Exploring phylogenetic tree balance metrics - 2012-10-10T00:00:00-07:00 + 2012-10-10T00:00:00+02:00 http://recology.info//2012/10/phylogenetic-tree-balance <p>I need to simulate balanced and unbalanced phylogenetic trees for some research I am doing. In order to do this, I do rejection sampling: simulate a tree -&gt; measure tree shape -&gt; reject if not balanced or unbalanced <strong>enough</strong>. But what is enough? We need to define some cutoff value to determine what will be our set of balanced and unbalanced trees. </p> @@ -12791,7 +13060,7 @@ out<span class="p">[</span><span class="kp"&g GBIF biodiversity data from R - more functions - 2012-10-08T00:00:00-07:00 + 2012-10-08T00:00:00+02:00 http://recology.info//2012/10/rgbif-newfxns <h4>UPDATE: In response to Jarrett&#39;s query I laid out a separate use case in which you may want to query by higher taxonomic rankings than species. See below. In addition, added examples of querying by location in reply to comments by seminym.</h4> @@ -12977,7 +13246,7 @@ out<span class="p">[</span><span class="kp"&g Vertnet - getting vertebrate museum record data and a quick map - 2012-09-19T00:00:00-07:00 + 2012-09-19T00:00:00+02:00 http://recology.info//2012/09/rvertnet <p>We (<a href="http://ropensci.org/">rOpenSci</a>) started a repo to wrap the API for <a href="http://vertnet.org/index.php">VertNet</a>, an open access online database of vertebrate specimen records across many collection holders. Find the open source code <a href="https://github.com/ropensci/rvertnet">here</a> - please contribute if you are so inclined. We had a great Google Summer of Code student, <a href="http://vijaybarve.wordpress.com/">Vijay Barve</a> contributing to the repo this summer, so it is getting close to being CRAN-able. </p> @@ -13015,7 +13284,7 @@ out<span class="p">[</span><span class="kp"&g Getting data from figures in published papers - 2012-09-18T00:00:00-07:00 + 2012-09-18T00:00:00+02:00 http://recology.info//2012/09/getting-data <h2>The problem:</h2> @@ -13112,7 +13381,7 @@ out<span class="p">[</span><span class="kp"&g Scholarly metadata from R - 2012-09-17T00:00:00-07:00 + 2012-09-17T00:00:00+02:00 http://recology.info//2012/09/rmetadata <p>Metadata! Metadata is very cool. It&#39;s super hot right now - everybody is talking about it. Okay, maybe not everyone, but it&#39;s an important part of archiving scholarly work.</p> @@ -13397,7 +13666,7 @@ earliestDatestamp deletedRecord granularity Getting data on your government - 2012-09-01T00:00:00-07:00 + 2012-09-01T00:00:00+02:00 http://recology.info//2012/09/gov-dat <hr> @@ -13569,7 +13838,7 @@ $first_name Getting ecology and evolution journal titles from R - 2012-08-31T00:00:00-07:00 + 2012-08-31T00:00:00+02:00 http://recology.info//2012/08/get-ecoevo-journal-titles <hr> @@ -13673,7 +13942,7 @@ $first_name Ecology unconference at ESA 2013 - 2012-08-30T09:25:00-07:00 + 2012-08-30T18:25:00+02:00 http://recology.info//2012/08/ecology-unconference <hr> @@ -13706,7 +13975,7 @@ $first_name Making matrices with zeros and ones - 2012-08-30T09:02:00-07:00 + 2012-08-30T18:02:00+02:00 http://recology.info//2012/08/making-matrices <hr> @@ -13840,7 +14109,7 @@ $first_name ggplot2 maps with insets - 2012-08-22T00:00:00-07:00 + 2012-08-22T00:00:00+02:00 http://recology.info//2012/08/ggplot-inset-map <blockquote> <p>UPDATE: changed data source so that the entire example can be run by anyone on their own machine. Also, per Joachim&#39;s suggestion, I put a box around the blown up area of the map. In addition, rgeos and maptools removed, not needed.</p> @@ -13940,7 +14209,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla Hitting the Global Names Resolver API - 2012-07-20T00:00:00-07:00 + 2012-07-20T00:00:00+02:00 http://recology.info//2012/07/global-names-resolver <h2>Example of using the Global Names Resolver API to check species names</h2> @@ -14046,7 +14315,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla Recent R packages for ecology and evolution - 2012-06-14T00:00:00-07:00 + 2012-06-14T00:00:00+02:00 http://recology.info//2012/06/recent-r-eeb-packages <p>Many R packages/tools have come out recently for doing ecology and evolution. All of the below were described in Methods in Ecology and Evolution, except for spider, which came out in <a href="http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1755-0998">Molecular Ecology Resources</a>. Here are some highlights.</p> @@ -14123,7 +14392,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla Visualize your Github stats (forks and watchers) in a browser with R! - 2012-05-05T00:00:00-07:00 + 2012-05-05T00:00:00+02:00 http://recology.info//2012/05/opencpu-github-stats <p>So <a href="http://opencpu.org/">OpenCPU</a> is pretty awesome. You can run R in a browser using URL calls with an alphanumeric code (e.g., x3e50ee0780) defining a stored function, and any arguments you pass to it. </p> @@ -14154,7 +14423,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla mvabund - new R pkg for multivariate abundance data - 2012-03-19T00:00:00-07:00 + 2012-03-19T00:00:00+01:00 http://recology.info//2012/03/mvabund <p>There is a new R package in town, mvabund, which does, as they say &quot;statistical methods for analysing multivariate abundance data&quot;. The authors introduced the paper in an online early paper in Methods in Ecology and Evolution <a href="http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2012.00190.x/full">here</a>, R package <a href="http://cran.r-project.org/web/packages/mvabund/index.html">here</a>. </p> @@ -14173,7 +14442,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla Journal Articles Need Interactive Graphics - 2012-02-25T00:00:00-08:00 + 2012-02-25T00:00:00+01:00 http://recology.info//2012/02/science-publications-need-interactive-graphics <p>I should have thought of it earlier: In a day and age when we are increasingly reading scientific literature on computer screens, why is it that we limit our peer-reviewed data representation to static, unchanging graphs and plots? Why do we not try to create dynamic visualizations of our rich and varied data sets? Would we not derive benefits in the quality and clarity of scientific discourse from publishing these visualizations?</p> @@ -14196,7 +14465,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla Take the INNGE survey on math and ecology - 2012-02-17T00:00:00-08:00 + 2012-02-17T00:00:00+01:00 http://recology.info//2012/02/math-ecology-survey <p>Many ecologists are R users, but we vary in our understanding of the math and statistical theory behind models we use. There is no clear consensus on what should be the basic mathematical training of ecologists. </p> @@ -14209,7 +14478,7 @@ vpa_ <span class="o">&lt;-</span> viewport<span cla Scraping Flora of North America - 2012-01-27T00:00:00-08:00 + 2012-01-27T00:00:00+01:00 http://recology.info//2012/01/flora-north-america-scraping <p>So <a href="http://fna.huh.harvard.edu/">Flora of North America</a> is an awesome collection of taxonomic information for plants across the continent. However, the information within is not easily machine readable. </p> @@ -14240,7 +14509,7 @@ install_github<span class="p">(</span><span class=" RNetLogo - A package for running NetLogo from R - 2012-01-23T00:00:00-08:00 + 2012-01-23T00:00:00+01:00 http://recology.info//2012/01/RNetLogo <p>Described in a new Methods in Ecology and Evolution paper <a href="http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00180.x/abstract">here</a>, a new <a href="http://cran.r-project.org/">R</a> package <a href="http://cran.r-project.org/web/packages/RNetLogo/index.html">RNetLogo</a> allows you to use <a href="http://ccl.northwestern.edu/netlogo/">NetLogo</a> from R. </p> @@ -14253,7 +14522,7 @@ install_github<span class="p">(</span><span class=" Taking a Closer Look at Peer Review - 2012-01-16T00:00:00-08:00 + 2012-01-16T00:00:00+01:00 http://recology.info//2012/01/reviewing-peer-review-process <p>This post is only tangentially about open science. It is more directly about the process of peer review and how it might be improved. I am working on a follow-up post about how these points can be addressed in an open publishing environment.</p> @@ -14290,7 +14559,7 @@ install_github<span class="p">(</span><span class=" Function for phylogeny resolution - 2012-01-13T00:00:00-08:00 + 2012-01-13T00:00:00+01:00 http://recology.info//2012/01/phylogeny-resolution <p>UPDATE: Yeah, so the treeresstats function had a problem in one of the calculations. I fixed that and added some more calulcations to the function. </p> @@ -14357,7 +14626,7 @@ install_github<span class="p">(</span><span class=" Moving from blogger and wordpress to jekyll - 2012-01-11T00:00:00-08:00 + 2012-01-11T00:00:00+01:00 http://recology.info//2012/01/moving-from-blogger-wordpress-to-jekyll <p>Recology used to be hosted on Blogger, and my personal website was hosted on Wordpress. Neither platform was very satisfying. Blogger is very limited in their layouts, unless you use dynamic views, which suck because they don&#39;t allow javascript snippets to render GitHub gists. Wordpress is just limited all around as you can&#39;t put in hardly anythig excep text and some pictures. They both have their place, but not so much for content that requires syntax highlighting, references, etc. </p> @@ -14389,7 +14658,7 @@ install_github<span class="p">(</span><span class=" Presenting results of logistic regression - 2012-01-10T05:50:00-08:00 + 2012-01-10T14:50:00+01:00 http://recology.info//2012/01/logistic-regression-barplot-fig <p>So my advisor pointed out this &#39;new&#39; (well, 2004), way of plotting results of logistic regression results. The idea was presented in a 2004 Bulletin of the Ecological Society of America issue (<a href="http://esapubs.org/bulletin/backissues/085-3/bulletinjuly2004_2column.htm#tools1">here</a>). I tried to come up with a solution using, what else, ggplot2. I don&#39;t have it quite all the way down - I am missing the second y-axis values for the histograms, but someone smarter than me can figure that part out (note that Hadley doesn&#39;t want to support second y-axes in ggplot2, but they can probably be hacked on). </p> @@ -14421,7 +14690,7 @@ install_github<span class="p">(</span><span class=" Testing twitterfeed - 2012-01-08T09:33:00-08:00 + 2012-01-08T18:33:00+01:00 http://recology.info//2012/01/testing-twitterfeed <p>Does this work on twitterfeed?</p> @@ -14430,7 +14699,7 @@ install_github<span class="p">(</span><span class=" Weecology can has new mammal dataset - 2011-12-29T06:11:00-08:00 + 2011-12-29T15:11:00+01:00 http://recology.info//2011/12/weecology-can-has-new-mammal-dataset <p>So the <a href="http://weecology.org/">Weecology</a> folks have published a large dataset on mammal communities in a data paper in <a href="http://www.esajournals.org/doi/abs/10.1890/11-0262.1">Ecology</a>. I know nothing about mammal communities, but that doesn&#39;t mean one can&#39;t play with the data...</p> @@ -14469,7 +14738,7 @@ install_github<span class="p">(</span><span class=" Recology is 1 yr old - 2011-12-23T16:52:00-08:00 + 2011-12-24T01:52:00+01:00 http://recology.info//2011/12/recology-is-1-yr-old <p>This blog has lasted a whole year already. &nbsp;Thanks for reading and commenting. </p> @@ -14515,7 +14784,7 @@ install_github<span class="p">(</span><span class=" Dynamic views don't support javascript-so reverting back to simple views - 2011-12-22T09:36:00-08:00 + 2011-12-22T18:36:00+01:00 http://recology.info//2011/12/dynamic-views-don-t-support-javascript-so-reverting-back-to-simple-views <p>Sorry for the temporary loss of GitHub gists...Hopefully dynamic views will support javascript soon!!</p> @@ -14524,7 +14793,7 @@ install_github<span class="p">(</span><span class=" I Work For The Internet ! - 2011-12-13T07:35:00-08:00 + 2011-12-13T16:35:00+01:00 http://recology.info//2011/12/i-work-for-internet <p>UPDATE: code and figure updated at 647 AM CST on 19 Dec &#39;11. Also, see Jarrett Byrnes (improved) fork of my gist <a href="https://gist.github.com/1474802">here</a>.</p> @@ -14545,7 +14814,7 @@ install_github<span class="p">(</span><span class=" LondonR meetings presentations - 2011-12-11T09:59:00-08:00 + 2011-12-11T18:59:00+01:00 http://recology.info//2011/12/londonr-meetings-presentations <p>Three presentations uploaded on <a href="http://www.londonr.org/Presentations/Agenda.html" target="_blank">LondonR meetings website</a>. &nbsp;I especially enjoyed the <a href="http://www.londonr.org/Presentations/segue-presentation-LondonRUG%20(1).ppt" target="_blank">JD Long presentation</a> on the seque package for simulations using Amazon&#39;s EC2.</p> @@ -14554,7 +14823,7 @@ install_github<span class="p">(</span><span class=" rOpenSci won 3rd place in the PLoS-Mendeley Binary Battle! - 2011-11-30T10:28:00-08:00 + 2011-11-30T19:28:00+01:00 http://recology.info//2011/11/ropensci-won-3rd-place-in-the-plos-mendeley-binary-battle <p><br /><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;">I am part of the rOpenSci development team (<a href="http://ropensci.org/developers/" target="_blank">along with Carl Boettiger, Karthik Ram, and Nick Fabina</a>). &nbsp; Our website: <a href="http://ropensci.org/">http://ropensci.org/</a>. &nbsp;Code at Github:&nbsp;<a href="https://github.com/ropensci">https://github.com/ropensci</a></div><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;"><br /></div><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;">We entered two of our R packages for integrating with PLoS Journals (rplos) and Mendeley (RMendeley) in the&nbsp;<a data-mce-href="http://dev.mendeley.com/api-binary-battle" href="http://dev.mendeley.com/api-binary-battle" target="_blank">Mendeley-PLoS Binary Battle</a>. &nbsp;Get them at GitHub (<a data-mce-href="https://github.com/ropensci/rplos" href="https://github.com/ropensci/rplos" target="_blank">rplos</a>;&nbsp;<a data-mce-href="https://github.com/ropensci/RMendeley" href="https://github.com/ropensci/RMendeley" target="_blank">RMendeley</a>).</div><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;"><br /></div><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;">These two packages allow users (from R! of course) to search and retrieve data from PLoS journals (including their altmetrics data), and from Mendeley. &nbsp;You could surely mash up data from both PLoS and Mendeley. &nbsp;That&#39;s what&#39;s cool about rOpenSci - we provide the tools, and leave it up to users vast creativity to do awesome things.</div><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;"><br /></div><div style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;">3rd place gives us a $1,000 prize, plus a&nbsp;<a data-mce-href="http://ardrone.parrot.com/parrot-ar-drone/uk/" href="http://ardrone.parrot.com/parrot-ar-drone/uk/" target="_blank">Parrot AR Drone helicopter</a>.</div></p> @@ -14563,7 +14832,7 @@ install_github<span class="p">(</span><span class=" Public vote open for Mendely-PLoS Binary Battle: vote rOpenSci! - 2011-11-19T07:00:00-08:00 + 2011-11-19T16:00:00+01:00 http://recology.info//2011/11/public-vote-open-for-mendely-plos <p><a href="http://www.surveygizmo.com/s3/722753/Mendeley-PLoS-Binary-Battle-Public-Vote">http://www.surveygizmo.com/s3/722753/Mendeley-PLoS-Binary-Battle-Public-Vote</a></p> @@ -14572,7 +14841,7 @@ install_github<span class="p">(</span><span class=" My talk on doing phylogenetics in R - 2011-11-18T11:15:00-08:00 + 2011-11-18T20:15:00+01:00 http://recology.info//2011/11/my-talk-on-doing-phylogenetics-in-r <p>I gave a talk today on doing very basic phylogenetics in R, including getting sequence data, aligning sequence data, plotting trees, doing trait evolution stuff, etc.<br /><br />Please comment if you have code for doing bayesian phylogenetic inference in R. &nbsp;I know phyloch has function mrbayes, but can&#39;t get it to work...<br /><br /><br /><div id="__ss_10222772" style="width: 425px;"><strong style="display: block; margin: 12px 0 4px;"><a href="http://www.slideshare.net/schamber/phylogenetics-in-r" target="_blank" title="Phylogenetics in R">Phylogenetics in R</a></strong> <iframe frameborder="0" height="355" marginheight="0" marginwidth="0" scrolling="no" src="http://www.slideshare.net/slideshow/embed_code/10222772" width="425"></iframe> <br /><div style="padding: 5px 0 12px;">View more <a href="http://www.slideshare.net/" target="_blank">presentations</a> from <a href="http://www.slideshare.net/schamber" target="_blank">schamber</a> </div></div></p> @@ -14581,7 +14850,7 @@ install_github<span class="p">(</span><span class=" Check out a video of my research at RocketHub - 2011-11-01T07:36:00-07:00 + 2011-11-01T15:36:00+01:00 http://recology.info//2011/11/check-out-video-of-my-research-at <p>Okay, so this post isn&#39;t at all about R - but I can&#39;t resist begging my readers for some help. <br /><br />I’m trying to get some crowdfunding for my research on the evolution of native plants in agricultural landscapes. My campaign is part of a larger project by about 50 other scientists and me to see how well it works to go straight to the public to get funding for science research. All these projects, including mine, are hosted at a site called RocketHub - a site that hosts crowdfunding projects of all sorts – and now they have science.<br /><br />It is important to get a few bucks at the beginning so that the people that don’t know me with deep pockets will hopefully chip in once they see the money ball rolling. <br /><br />The funding will go towards paying some students to collect data in the lab for me.<br /><br />Here’s the link if you want to donate, or just to check out the video I made about my research!<br /><a href="http://www.rockethub.com/projects/3790-evolution-in-agriculture">http://www.rockethub.com/projects/3790-evolution-in-agriculture</a><br /><br /><br /><br />And watch the video here too:<br /><br /><iframe allowfullscreen="" frameborder="0" height="315" src="http://www.youtube.com/embed/W1mQgK6xFn0" width="560"></iframe></p> @@ -14590,7 +14859,7 @@ install_github<span class="p">(</span><span class=" My little presentation on getting web data through R - 2011-10-28T09:16:00-07:00 + 2011-10-28T18:16:00+02:00 http://recology.info//2011/10/my-little-presentation-on-getting-web <div id="__ss_9926321" style="width: 425px;"><span style="display: block; margin: 12px 0 4px;">With examples from <a href="http://ropensci.org/">rOpenSci</a> R packages.&nbsp;</span><span style="display: block; margin: 12px 0 4px;">p.s. I am no expert at this...</span><strong style="display: block; margin: 12px 0 4px;"><br /></strong><strong style="display: block; margin: 12px 0 4px;"><a href="http://www.slideshare.net/schamber/web-data-from-r" target="_blank" title="Web data from R">Web data from R</a></strong> <iframe frameborder="0" height="355" marginheight="0" marginwidth="0" scrolling="no" src="http://www.slideshare.net/slideshow/embed_code/9926321" width="425"></iframe> <br /><div style="padding: 5px 0 12px;">View more <a href="http://www.slideshare.net/" target="_blank">presentations</a> from <a href="http://www.slideshare.net/schamber" target="_blank">schamber</a> </div></div> @@ -14599,7 +14868,7 @@ install_github<span class="p">(</span><span class=" Two new rOpenSci R packages are on CRAN - 2011-10-27T05:27:00-07:00 + 2011-10-27T14:27:00+02:00 http://recology.info//2011/10/two-new-ropensci-r-packages-are-on-cran <p><a href="http://www.carlboettiger.info/">Carl Boettiger</a>, a graduate student at UC Davis, just got two packages on <a href="http://cran.r-project.org/web/packages/available_packages_by_name.html">CRAN</a>. &nbsp;One is <a href="http://cran.r-project.org/web/packages/treebase/index.html">treebase</a>, which which handshakes with the <a href="http://www.treebase.org/treebase-web/home.html">Treebase</a> API. &nbsp;The other is <a href="http://cran.r-project.org/web/packages/rfishbase/index.html">rfishbase</a>, which connects with the <a href="http://www.fishbase.org/search.php">Fishbase</a>, although I believe just scrapes XML content as there is no API. &nbsp;See development on GitHub for treebase <a href="https://github.com/ropensci/treeBASE">here</a>, and for rfishbase <a href="https://github.com/ropensci/rfishbase">here</a>. &nbsp;Carl has some tutorials on treebase and rfishbase at his website <a href="http://www.carlboettiger.info/">here</a>, and we have an official rOpenSci tutorial for treebase <a href="http://ropensci.org/tutorials/r-treebase-tutorial/">here</a>.<br /><br />Basically, these two R packages let you search and pull down data from Treebase and Fishbase - pretty awesome. &nbsp;This improves workflow, and puts your data search and acquisition component into your code, instead of being a bunch of mouse clicks in a browser.<br /><br />These two packages are part of the <a href="http://ropensci.org/">rOpenSci project</a>.</p> @@ -14608,7 +14877,7 @@ install_github<span class="p">(</span><span class=" Two-sex demographic models in R - 2011-10-26T07:31:00-07:00 + 2011-10-26T16:31:00+02:00 http://recology.info//2011/10/two-sex-demographic-models-in-r <p>Tom Miller (a prof here at Rice) and Brian Inouye have a paper out in Ecology (<a href="http://www.esajournals.org/doi/abs/10.1890/11-0028.1">paper</a>, <a href="http://www.esapubs.org/archive/archive_E.htm">appendices</a>) that confronts two-sex models of dispersal with empirical data.<br /><br />They conducted the first confrontation of two-sex demographic models with empirical data on lab populations of bean beetles <i>Callosobruchus</i>. <br /><br />Their R code for the modeling work is available at Ecological Archives (link <a href="http://www.esapubs.org/archive/ecol/E092/186/">here</a>).<br /><br /><br />Here is a figure made from running the five blocks of code in &#39;Miller<em>and</em>Inouye_figures.txt&#39; that reproduces Fig. 4 (A-E) in their Ecology paper (p = proportion female, Nt = density). &nbsp;Nice!<br />A: Saturating density dependence <br />B: Over-compensatory density dependence<br />C: Sex-specific gamma&#39;s (but bM=bF=0.5)<br />D:&nbsp;<span style="background-color: transparent;">Sex-specific b&#39;s (but gammaM=gammaF=1)</span><br />E:&nbsp;<span style="background-color: transparent;">Sex-specific b&#39;s (but gammaM=gammaF=2)</span><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/-Ht7fPEjDhQY/TqgYoiQQlPI/AAAAAAAAFEU/ehhPrxOseK4/s1600/Screen+Shot+2011-10-26+at+9.26.11+AM.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="391" src="http://2.bp.blogspot.com/-Ht7fPEjDhQY/TqgYoiQQlPI/AAAAAAAAFEU/ehhPrxOseK4/s400/Screen+Shot+2011-10-26+at+9.26.11+AM.png" width="400" /></a></div><br /></p> @@ -14617,7 +14886,7 @@ install_github<span class="p">(</span><span class=" New food web dataset - 2011-10-14T11:00:00-07:00 + 2011-10-14T20:00:00+02:00 http://recology.info//2011/10/new-food-web-dataset <p><br /><br />So, there is a new food web dataset out that was put in Ecological Archives <a href="http://www.esapubs.org/Archive/ecol/E092/173/default.htm">here</a>, and I thought I would play with it. The food web is from Otago Harbour, an intertidal mudflat ecosystem in New Zealand. The web contains 180 nodes, with 1,924 links. <br /><br />Fun stuff...<br /><br /><div class="separator" style="clear: both; text-align: center;">igraph, default layout plot</div><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/-2lQOoeAqGCM/Tphf9GJI8LI/AAAAAAAAFEA/EPwum7GfwXg/s1600/igraphplot.jpeg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="303" src="http://3.bp.blogspot.com/-2lQOoeAqGCM/Tphf9GJI8LI/AAAAAAAAFEA/EPwum7GfwXg/s400/igraphplot.jpeg" width="400" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;">igraph, circle layout plot, nice</div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/--hGl2IwHi4M/TphhJYdBO0I/AAAAAAAAFEI/8GsLuUkbYcM/s1600/igraphcircleplot.jpeg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="303" src="http://1.bp.blogspot.com/--hGl2IwHi4M/TphhJYdBO0I/AAAAAAAAFEI/8GsLuUkbYcM/s400/igraphcircleplot.jpeg" width="400" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;">My funky little gggraph function plot</div><div class="separator" style="clear: both; text-align: center;">get the gggraph function, and make it better, <a href="https://github.com/sckott/gggraph">here at Github</a></div><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/-MBPHlFaVWos/Tphf82gWUpI/AAAAAAAAFD4/qaxCX4PP-C0/s1600/gggraphplot.jpeg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="303" src="http://4.bp.blogspot.com/-MBPHlFaVWos/Tphf82gWUpI/AAAAAAAAFD4/qaxCX4PP-C0/s400/gggraphplot.jpeg" width="400" /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><div class="separator" style="clear: both; text-align: center;"><br /></div><br /><br /><script src="https://gist.github.com/1287545.js?file=newfoodwb.R"></script></p> @@ -14626,7 +14895,7 @@ install_github<span class="p">(</span><span class=" Phylogenetic community structure: PGLMMs - 2011-10-13T08:18:00-07:00 + 2011-10-13T17:18:00+02:00 http://recology.info//2011/10/phylogenetic-community-structure-pglmms <p>So, <a href="http://r-ecology.blogspot.com/2011/01/new-approach-to-analysis-of.html">I&#39;ve blogged about this topic before</a>, way back on 5 Jan this year.<br /><br />Matt Helmus, a postdoc in the <a href="http://woottonlab.uchicago.edu/">Wootton lab at the University of Chicago</a>, published a paper with Anthony Ives in Ecological Monographs this year (<a href="http://www.esajournals.org/doi/abs/10.1890/10-1264.1">abstract here</a>). &nbsp;The paper addressed a new statistical approach to phylogenetic community structure. <br /><br />As I said in the original post, part of the power of the PGLMM (phylogenetic generalized linear mixed models) approach is that you don&#39;t have to conduct quite so many separate statistical tests as with the previous null model/randomization approach. <br /><br />Their original code was written in Matlab. &nbsp;Here I provide the R code that Matt has so graciously shared with me. &nbsp;There are four functions and a fifth file has an example use case. &nbsp;The example and output are shown below. <br /><br />Look for the inclusion of Matt&#39;s PGLMM to the picante R package in the future.<br /><br /><u>Here are links to the files as GitHub gists:&nbsp;</u><br />PGLMM.data.R: &nbsp;<a href="https://gist.github.com/1278205">https://gist.github.com/1278205</a><br />PGLMM.fit.R: &nbsp;<a href="https://gist.github.com/1284284">https://gist.github.com/1284284</a><br />PGLMM.reml.R: &nbsp;<a href="https://gist.github.com/1284287">https://gist.github.com/1284287</a><br />PGLMM.sim.R: &nbsp;<a href="https://gist.github.com/1284288">https://gist.github.com/1284288</a><br />PGLMM<em>example.R: &nbsp;<a href="https://gist.github.com/1284442">https://gist.github.com/1284442</a><br /><br />Enjoy!<br /><br /><br /><b><u>The example</u></b><br />&lt;script src=&quot;https://gist.github.com/1284477.js?file=PGLMM</em>exampleoutput.R&quot;&gt;</script><br /><br /><br />..and the figures...<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/-ODHXaozYSFY/Tpb9qSXbbHI/AAAAAAAAFDg/hLHlGDiYRSw/s1600/plot1.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="315" src="http://3.bp.blogspot.com/-ODHXaozYSFY/Tpb9qSXbbHI/AAAAAAAAFDg/hLHlGDiYRSw/s320/plot1.png" width="320" /></a></div><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/-tQYXCZWIMYs/Tpb9q5zF4EI/AAAAAAAAFDo/_iOxMYf5DsI/s1600/plot2.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="307" src="http://2.bp.blogspot.com/-tQYXCZWIMYs/Tpb9q5zF4EI/AAAAAAAAFDo/_iOxMYf5DsI/s320/plot2.png" width="320" /></a></div><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/-fowoTDI0chc/Tpb9rMAlswI/AAAAAAAAFDw/7pvqZ-jpECk/s1600/plot3.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="http://1.bp.blogspot.com/-fowoTDI0chc/Tpb9rMAlswI/AAAAAAAAFDw/7pvqZ-jpECk/s320/plot3.png" width="316" /></a></div><br /></p> @@ -14635,7 +14904,7 @@ install_github<span class="p">(</span><span class=" R talk on regular expressions (regex) - 2011-10-06T08:50:00-07:00 + 2011-10-06T17:50:00+02:00 http://recology.info//2011/10/r-talk-on-regular-expressions-regex <p>Regular expressions are a powerful in any language to manipulate, search, etc. data.</p> @@ -14665,7 +14934,7 @@ install_github<span class="p">(</span><span class=" R tutorial on visualizations/graphics - 2011-09-30T08:48:00-07:00 + 2011-09-30T17:48:00+02:00 http://recology.info//2011/09/r-tutorial-on-visualizationsgraphics <p>Rolf Lohaus, a Huxley postdoctoral fellow here in the EEB dept at Rice University, gave our R course a talk on basic visualizations in R this morning.<br /><br />Enjoy!<br /><br /><br /><script src="https://gist.github.com/1254174.js?file=visualizations_tutorial.R"></script></p> @@ -14674,7 +14943,7 @@ install_github<span class="p">(</span><span class=" Short on funding? Can't get a grant? Crowdfunding! #SciFund - 2011-09-27T09:16:00-07:00 + 2011-09-27T18:16:00+02:00 http://recology.info//2011/09/short-on-funding-cant-get-grant <p>Crowdsourced funding is becoming a sustainable way for various artists, entrepreneurs, etc. to get their idea funded from individuals. For example, think of <a href="http://www.kickstarter.com/">Kickstarter</a> and <a href="http://www.rockethub.com/">RocketHub</a>.<br /><br /><br />Jai Ranganathan and Jarrett Byrnes have started an experiment to determine how well crowdfunding can work for scientists: The SciFund Challenge. Go here to <a href="http://scifund.wordpress.com/sign-up/">signup</a> and here for their <a href="http://scifund.wordpress.com/">website</a>. <br /><br /><br />The deadline to sign up is Oct. 1</p> @@ -14683,7 +14952,7 @@ install_github<span class="p">(</span><span class=" Ten Simple Rules for OA Publishers talk by Philip Bourne - 2011-09-23T12:44:00-07:00 + 2011-09-23T21:44:00+02:00 http://recology.info//2011/09/ten-simple-rules-for-oa-publishers-talk <div id="__ss_9354451" style="width: 425px;"><strong style="display: block; margin: 12px 0 4px;"><a href="http://www.slideshare.net/pebourne/ten-simple-rules-for-open-access-publishers" target="_blank" title="Ten Simple Rules for Open Access Publishers">Ten Simple Rules for Open Access Publishers</a></strong> <iframe frameborder="0" height="355" marginheight="0" marginwidth="0" scrolling="no" src="http://www.slideshare.net/slideshow/embed_code/9354451" width="425"></iframe> <br /><br /><div style="padding: 5px 0 12px;">View more <a href="http://www.slideshare.net/" target="_blank">presentations</a> from <a href="http://www.slideshare.net/pebourne" target="_blank">Philip Bourne</a> </div></div> @@ -14692,7 +14961,7 @@ install_github<span class="p">(</span><span class=" @drewconway interview on @DataNoBorders at the Strata conference - 2011-09-22T21:10:00-07:00 + 2011-09-23T06:10:00+02:00 http://recology.info//2011/09/drewconway-interview-on-datanoborders <p>The O&#39;Reilly Media Strata Summit has many interviews on YouTube (just search YouTube for it)<br /><br />Drew Conway is the author of a R packages, including <a href="http://cran.r-project.org/web/packages/infochimps/index.html">infochimps</a>, an R wrapper to the <a href="http://www.infochimps.com/">Infochimps</a> API service.<br /><br />The YouTube video:<iframe allowfullscreen="" frameborder="0" height="315" src="http://www.youtube.com/embed/fsnnwTWoOLk" width="560"></iframe><br /><br /><br /></p> @@ -14701,7 +14970,7 @@ install_github<span class="p">(</span><span class=" Open science talk by Carl Boettiger - 2011-09-22T20:55:00-07:00 + 2011-09-23T05:55:00+02:00 http://recology.info//2011/09/open-science-talk-by-carl-boettiger <p>Carl Boettiger gave a talk on the topic of open science to incoming UC Davis graduate students.</p> @@ -14714,7 +14983,7 @@ install_github<span class="p">(</span><span class=" My take on an R introduction talk - 2011-09-09T11:03:00-07:00 + 2011-09-09T20:03:00+02:00 http://recology.info//2011/09/my-take-on-r-introduction-talk <p>UPDATE: I put in an R tutorial as a Github gist below.<br /><br /><br />Here is a short intro R talk I gave today...for what it&#39;s worth...<br /><br /><br /><br /><div id="__ss_9195930" style="width: 425px;"><strong style="display: block; margin: 12px 0 4px;"><a href="http://www.slideshare.net/schamber/r-introduction" target="_blank" title="R Introduction">R Introduction</a></strong> <iframe frameborder="0" height="355" marginheight="0" marginwidth="0" scrolling="no" src="http://www.slideshare.net/slideshow/embed_code/9195930" width="425"></iframe> <br /><div style="padding: 5px 0 12px;">View more <a href="http://www.slideshare.net/" target="_blank">presentations</a> from <a href="http://www.slideshare.net/schamber" target="_blank">schamber</a> <br /><br /><br /><br /></div></div><script src="https://gist.github.com/1208321.js?file=Rintrotutorial.R"></script></p> @@ -14723,7 +14992,7 @@ install_github<span class="p">(</span><span class=" A Data Visualization Book - 2011-09-08T23:28:00-07:00 + 2011-09-09T08:28:00+02:00 http://recology.info//2011/09/data-visualization-book <p><i>Note: thanks to Scott for inviting me to contribute to the Recology blog despite being an ecology outsider; my work is primarily in atomic physics. -Pascal</i><p>A part of me has always liked thinking about how to effectively present information, but until the past year, I had not read much to support my (idle) interest in information visualization. That changed in the spring when I read Edward Tufte&#39;s <i>The Visual Display of Quantitative Information</i>, a book that stimulated me to think more deeply about presenting information. I originally started with a specific task in mind--a wonderful tool for focusing one&#39;s interests--but quickly found that Tufte&#39;s book was less a practical guide and more a list of general design principles. Then, a few months ago, I stumbled upon Nathan Yau&#39;s blog, <a href="http://flowingdata.com/">FlowingData</a>, and found out he was writing a practical guide to design and visualization. Conveniently enough for me, Yau&#39;s book, <i><a href="http://book.flowingdata.com/">Visualize This</a></i>, would be released within a month of my discovery of his blog; what follows are my impressions of <i>Visualize This</i>.<p>I have liked <i>Visualize This</i> a lot.  Yau writes with much the same informal tone as on his blog, and the layout is visually pleasing (good thing, too, for a book about visualizing information!).  The first few chapters are pretty basic if you have done much data manipulation before, but it is really nice to have something laid out so concisely.  The examples are good, too, in that he is very explicit about every step: there is no intuiting what that missing step should be.  The author even acknowledges in the introduction that the first part of the book is at an introductory level.<p>Early in the book, Yau discusses where to obtain data. This compilation of sources is potentially a useful reference for someone, like me, who almost always generates his own data in the lab. Unfortunately, Yau does not talk much about preparation of (or best practices for) your own data.  Additionally, from the perspective of a practicing scientist, it would have been nice to hear about how to archive data to make sure it is readable far into the future, but that is probably outside the scope of the book.<p>Yau seems really big into using open source software for getting and analyzing data (e.g. Python, R, etc…), but he is surprisingly attached to the proprietary Adobe Illustrator for turning figures into presentation quality graphics.  He says that he feels like the default options in most analysis programs do not make for very good quality graphics (and he is right), but he does not really acknowledge that you can generate nice output if you go beyond the default settings.  For me, the primary advantage of generating output programmatically is that it is easy to regenerate when you need to change the data or the formatting on the plot.  Using a graphical user interface, like in Adobe Illustrator, is nice if you are only doing something once (how often does that happen?), but when you have to regenerate the darn figure fifty times to satisfy your advisor, it gets tedious to move things around pixel by pixel.<p>By the time I reached the middle chapters, I started finding many of the details to be repetitive. Part of this repetition stems from the fact that Yau divides these chapters by the type of visualization. For example, &quot;Visualizing Proportions&quot; and &quot;Visualizing Relationships&quot; are two of the chapter titles. While I think these distinctions are important ones for telling the right story about one&#39;s data, creating figures for the different data types often boils down to choosing different functions in R or Python. People with less analysis and presentation experience should find the repetition helpful, but I increasingly skimmed these sections as I went along.  <p>Working through Yau&#39;s examples for steps you do not already know would probably be the most useful way of getting something out of the book.  So, for example, I started trying to use Python to scrape data from a webpage, something I had not previously done.  I followed the book&#39;s example of this data-scraping just fine, but as with most things in programming, you find all sorts of minor hurdles to clear when you try your own thing. In my case, I am re-learning the Python I briefly learned about 10 years ago--partly in anticipation of not having access to Matlab licenses once I vacate the academy--since I have forgotten a lot of the syntax.  A lot of this stuff would be faster if I were working in Matlab which I grew more familiar with in graduate school.<p>Overall, <i>Visualize This</i> is a really nice looking book and will continue to be useful to me as a reference. Yau concludes his book with a refreshing reminder to provide context for the data we present. This advice is particularly relevant when presenting to a wider or lay audience, but it is still important for us, as scientists, to clearly communicate our findings in the literature. Patterns in the data are not often self-evident, and therefore we should think carefully about which visualization tools will best convey the meaning of our results.<p><small>Edited to add <a href="http://book.flowingdata.com/"> a link to <i>Visualize This</i></a> here and in the introductory paragraph.</small></p> @@ -14732,7 +15001,7 @@ install_github<span class="p">(</span><span class=" FigShare Talk - 2011-09-08T21:36:00-07:00 + 2011-09-09T06:36:00+02:00 http://recology.info//2011/09/figshare-talk <p>FigShare - I very much like this idea of a place to put your data online that is NOT published. Dryad is a nice place for datastes linked with published papers, but there isn&#39;t really a place for datasets that perhaps did not make the cut for a published paper, and if known to the scientific community, could potentially help resolve the &quot;file-drawer&quot; effect in meta-analyses. (wow, run on sentence)<br /><br />&nbsp;<iframe frameborder="0" height="225" src="http://player.vimeo.com/video/26416313?title=0&amp;byline=0&amp;portrait=0" width="400"></iframe><br /><a href="http://vimeo.com/26416313">&quot;Figshare - Why don&#39;t you publish all your research?&quot; Mark Hahnel Imperial College London</a> from <a href="http://vimeo.com/biogeeks">London Biogeeks</a> on <a href="http://vimeo.com/">Vimeo</a>.</p> @@ -14741,7 +15010,7 @@ install_github<span class="p">(</span><span class=" Jonathan Eisen on the Fourth Domain and Open Science - 2011-09-06T16:53:00-07:00 + 2011-09-07T01:53:00+02:00 http://recology.info//2011/09/jonathan-eisen-on-fourth-domain-and <p><iframe frameborder="0" height="227" src="http://player.vimeo.com/video/28444926?portrait=0" width="400"></iframe><br /><br /><br /><a href="http://vimeo.com/28444926">Stalking the Fourth Domain with Jonathan Eisen, Ph D</a> from <a href="http://vimeo.com/user8340111">mendelspod</a> on <a href="http://vimeo.com/">Vimeo</a>.</p> @@ -14750,7 +15019,7 @@ install_github<span class="p">(</span><span class=" rnpn: An R interface for the National Phenology Network - 2011-08-31T08:26:00-07:00 + 2011-08-31T17:26:00+02:00 http://recology.info//2011/08/rnpn-r-interface-for-national-phenology <p>The team at <a href="http://ropensci.org/">rOpenSci</a> and I have been working on a wrapper for the <a href="http://www.usanpn.org/">USA National Phenology Network</a> API. The following is a demo of some of the current possibilities. We will have more functions down the road. Get the publicly available code, and contribute, at Github <a href="https://github.com/ropensci/rnpn">here</a>. If you try this out look at the <a href="https://github.com/ropensci/rnpn/blob/master/DESCRIPTION">Description file </a>for the required R packages to run rnpn. Let us know at Github (<a href="https://github.com/ropensci">here</a>) or at our website &nbsp;<a href="http://ropensci.org/">http://ropensci.org/</a>, or&nbsp;in the comments below, or on twitter (@rOpenSci), what use cases you would like to see with the rnpn package.<br /><br /><b><u>Method and demo of each</u></b>:<br /><i><b>Get observations for species by day</b></i><br /><i><span class="Apple-style-span" style="font-style: normal;">From the documentation: &quot;</span></i><span class="Apple-style-span" style="font-family: Arial; font-size: 13px; white-space: pre-wrap;">This function will return a list of species, containing all the dates which observations were made about the species, and a count of the number of such observations made on that date.</span><i><span class="Apple-style-span" style="font-style: normal;">&quot;</span></i><br /><br /><span class="Apple-style-span" style="font-size: x-small;">#### Note, the data below is truncated for blogging brevity...</span><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">&gt; getobsspbyday<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">2</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> <span style="color: blue;">&#39;2008-01-01&#39;</span><span style="color: #339933;">,</span> <span style="color: blue;">&#39;2011-12-31&#39;</span><span style="color: #666666;">) # Searched for species 1 and 2 from Jan 1, 2008 to Dec 31, 2011</span><br /> <a href="http://inside-r.org/r-doc/base/date"><span style="color: #003399; font-weight: bold;">date</span></a> count species<br /><span style="color: #cc66cc;">1</span> <span style="color: #cc66cc;">2009</span>-03-08 <span style="color: #cc66cc;">2</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">2</span> <span style="color: #cc66cc;">2009</span>-03-<span style="color: #cc66cc;">15</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">3</span> <span style="color: #cc66cc;">2009</span>-03-<span style="color: #cc66cc;">22</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">4</span> <span style="color: #cc66cc;">2009</span>-03-<span style="color: #cc66cc;">24</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">5</span> <span style="color: #cc66cc;">2009</span>-03-<span style="color: #cc66cc;">26</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">6</span> <span style="color: #cc66cc;">2009</span>-04-<span style="color: #cc66cc;">17</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">7</span> <span style="color: #cc66cc;">2009</span>-04-<span style="color: #cc66cc;">24</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">8</span> <span style="color: #cc66cc;">2009</span>-05-<span style="color: #cc66cc;">12</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">9</span> <span style="color: #cc66cc;">2009</span>-05-<span style="color: #cc66cc;">20</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">10</span> <span style="color: #cc66cc;">2009</span>-<span style="color: #cc66cc;">11</span>-<span style="color: #cc66cc;">24</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">11</span> <span style="color: #cc66cc;">2009</span>-<span style="color: #cc66cc;">12</span>-07 <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">12</span> <span style="color: #cc66cc;">2010</span>-01-<span style="color: #cc66cc;">18</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">13</span> <span style="color: #cc66cc;">2010</span>-01-<span style="color: #cc66cc;">23</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">62</span> <span style="color: #cc66cc;">2011</span>-05-<span style="color: #cc66cc;">29</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">63</span> <span style="color: #cc66cc;">2011</span>-06-<span style="color: #cc66cc;">27</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">64</span> <span style="color: #cc66cc;">2011</span>-06-<span style="color: #cc66cc;">30</span> <span style="color: #cc66cc;">2</span> species <span style="color: #cc66cc;">1</span><br /><span style="color: #cc66cc;">65</span> <span style="color: #cc66cc;">2009</span>-03-<span style="color: #cc66cc;">17</span> <span style="color: #cc66cc;">1</span> species <span style="color: #cc66cc;">2</span><br /><span style="color: #cc66cc;">66</span> <span style="color: #cc66cc;">2009</span>-04-03 <span style="color: #cc66cc;">3</span> species <span style="color: #cc66cc;">2</span><br /><span style="color: #cc66cc;">67</span> <span style="color: #cc66cc;">2009</span>-04-05 <span style="color: #cc66cc;">3</span> species <span style="color: #cc66cc;">2</span><br /><span style="color: #cc66cc;">68</span> <span style="color: #cc66cc;">2009</span>-04-<span style="color: #cc66cc;">10</span> <span style="color: #cc66cc;">3</span> species <span style="color: #cc66cc;">2</span><br /><span style="color: #cc66cc;">69</span> <span style="color: #cc66cc;">2009</span>-04-<span style="color: #cc66cc;">17</span> <span style="color: #cc66cc;">3</span> species <span style="color: #cc66cc;">2</span></pre></div></div><br /><br /><br /><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><b>Get individuals at specific stations</b></i><br />From the documentation: &quot;<span class="Apple-style-span" style="font-family: Arial; font-size: 13px; white-space: pre-wrap;">This function returns all of the individuals at a series of stations.</span>&quot;<br /><div><br /><div style="overflow-x: auto; overflow-y: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">&gt; getindsatstations<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">507</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">523</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span> <span style="color: #666666;"># Searched for any individuals at stations 507 and 523</span><br /> individual<em>id individual</em>name species<em>id kingdom<br /><span style="color: #cc66cc;">1</span> <span style="color: #cc66cc;">1200</span> dogwood <span style="color: #cc66cc;">12</span> Plantae<br /><span style="color: #cc66cc;">2</span> <span style="color: #cc66cc;">1197</span> purple lilac <span style="color: #cc66cc;">36</span> Plantae<br /><span style="color: #cc66cc;">3</span> <span style="color: #cc66cc;">1193</span> white <a href="http://inside-r.org/r-doc/base/t"><span style="color: #003399; font-weight: bold;">t</span></a> <span style="color: #cc66cc;">38</span> Plantae<br /><span style="color: #cc66cc;">4</span> <span style="color: #cc66cc;">3569</span> forsythia-<span style="color: #cc66cc;">1</span> <span style="color: #cc66cc;">73</span> Plantae<br /><span style="color: #cc66cc;">5</span> <span style="color: #cc66cc;">1206</span> jack <span style="color: #cc66cc;">150</span> Plantae<br /><span style="color: #cc66cc;">6</span> <span style="color: #cc66cc;">1199</span> trout lily <span style="color: #cc66cc;">161</span> Plantae<br /><span style="color: #cc66cc;">7</span> <span style="color: #cc66cc;">1198</span> dandy <span style="color: #cc66cc;">189</span> Plantae<br /><span style="color: #cc66cc;">8</span> <span style="color: #cc66cc;">1192</span> red <a href="http://inside-r.org/r-doc/base/t"><span style="color: #003399; font-weight: bold;">t</span></a> <span style="color: #cc66cc;">192</span> Plantae<br /><span style="color: #cc66cc;">9</span> <span style="color: #cc66cc;">1710</span> common lilac <span style="color: #cc66cc;">36</span> Plantae<br /><span style="color: #cc66cc;">10</span> <span style="color: #cc66cc;">1711</span> common lilac <span style="color: #cc66cc;">2</span> <span style="color: #cc66cc;">36</span> Plantae<br /><span style="color: #cc66cc;">11</span> <span style="color: #cc66cc;">1712</span> dandelion <span style="color: #cc66cc;">189</span> Plantae</pre></div></div><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><b>Get individuals of species at stations</b></i><br />From the documentation:&nbsp;&quot;<span class="Apple-style-span" style="font-family: Arial; font-size: 13px; white-space: pre-wrap;">This function will return a list of all the individuals, which are members of a species, among &nbsp;any number of stations.</span>&quot;</div><div><br /><div style="overflow-x: auto; overflow-y: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">&gt; getindspatstations<span style="color: #009900;">(</span><span style="color: #cc66cc;">35</span><span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">60</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">259</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">2009</span><span style="color: #009900;">)</span> <span style="color: #666666;"># Search for individuals of species 35 at stations 60 and 259 in year 2009</span><br /> individual</em>id individual<em>name number</em>observations<br /><span style="color: #cc66cc;">1</span> <span style="color: #cc66cc;">1715</span> west <span style="color: #cc66cc;">5</span><br /><span style="color: #cc66cc;">2</span> <span style="color: #cc66cc;">1716</span> east <span style="color: #cc66cc;">5</span></pre></div></div><br /><br /><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><br /></i><br /><i style="font-style: italic;"><b>Get observation associated with particular observation</b></i><br /><span class="Apple-style-span"><span class="Apple-style-span" style="font-style: normal;">From the documentation:</span><span class="Apple-style-span" style="font-family: Arial; font-size: x-small;"><span class="Apple-style-span" style="white-space: pre-wrap;"><span class="Apple-style-span" style="font-style: normal;">&nbsp;</span>&quot;</span></span></span><span class="Apple-style-span" style="font-family: Arial; font-size: 13px; white-space: pre-wrap;">This function will return the comment associated with a particular observation.</span><span class="Apple-style-span"><span class="Apple-style-span" style="font-family: Arial; font-size: x-small;"><span class="Apple-style-span" style="white-space: pre-wrap;">&quot;</span></span></span><br /><span class="Apple-style-span" style="font-family: Arial; font-size: x-small;"><span class="Apple-style-span" style="white-space: pre-wrap;"></span></span><br /><span class="Apple-style-span" style="font-family: monospace; white-space: pre;">&gt; getobscomm<span style="color: #009900;">(</span><span style="color: #cc66cc;">1938</span><span style="color: #009900;">)</span> <span style="color: #666666;"># The observation for observation number 1938</span></span><br /><div style="overflow-x: auto; overflow-y: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">$observation_comment<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;some lower branches are bare&quot;</span></pre></div></div></div></p> @@ -14759,7 +15028,7 @@ install_github<span class="p">(</span><span class=" Tenure track position in systematics at the University of Vermont - 2011-08-22T13:24:00-07:00 + 2011-08-22T22:24:00+02:00 http://recology.info//2011/08/tenure-track-position-in-systematics-at <p><span class="Apple-style-span" style="background-color: white; color: #4f4f4f; font-family: Arial, Tahoma, Helvetica, FreeSans, sans-serif; font-size: 15px; line-height: 20px;"></span><br /><center style="text-align: left;">There is an awesome position opening up for an assistant professor in systematics at the University of Vermont. Below is the announcement, and see the <a href="http://currentecology.blogspot.com/2011/08/tenure-track-position-in-systematics-at.html">original post</a> at the <a href="http://currentecology.blogspot.com/">Distributed Ecology blog</a>. Why is this related to R? One can do a lot of systematics work in R, including retrieving scientific collections data through an upcoming package handshaking with <a href="http://vertnet.org/index.php">VertNet</a> (part of the <a href="http://ropensci.org/">rOpenSci</a> project), managing large data sets, retrieval of GenBank data through the ape package (see fxn read.genbank), phylogenetic reconstruction and analysis, and more. So I am sure a systematist with R ninja skills will surely have a head up on the rest of the field.&nbsp;</center><center style="text-align: left;"><br /></center><center style="text-align: left;"><br /></center><center><b><br /></b></center><center><b>Assistant Professor in Systematics</b></center><br /><br />Department of Biology<br />University of Vermont<br />Burlington, Vermont<br /><br />The Department of Biology of the University of Vermont seeks applications for a tenure- track Assistant Professor position in Systematics and Evolutionary Biology of arthropods, especially insects. The position will be open in the fall of 2012. The successful candidate will have expertise in classical and molecular systematics, including analysis of complex data sets. Candidates pursuing phylogenomics and innovative methods in bioinformatics in combination with taxonomy are especially encouraged to apply. Department information at:<a href="http://www.uvm.edu/~biology/" style="color: #0499fa; text-decoration: none;">&nbsp;http://www.uvm.edu/~biology/</a>.<br /><br /><br />All applicants are expected to: 1) hold a Ph.D. degree in relevant disciplines and have two or more years of postdoctoral experience; 2) develop a competitively funded research program; 3) teach undergraduate courses (chosen from among general biology, evolution, systematic entomology, and others in the candidate&#39;s expertise); 4) teach, mentor and advise undergraduate and graduate students; and 5) oversee a natural history collection of historic significance.<br /><br />Candidates must apply online:<a href="http://www.uvmjobs.com/" style="color: #0499fa; text-decoration: none;">&nbsp;http://www.uvmjobs.com/</a>. On left see &quot;Search Postings&quot; then find &quot;Biology&quot; under &quot;HCM Department&quot; then posting 0040090 (first posting). Sorry, but we cannot supply the direct link because it will time out.<br /><br />Attach a cover letter with a statement of research focus and teaching interests (one document), a curriculum vitae, representative publications, and the contact information of three references.<br /><br />Review of applications will begin on September 15, 2011, and will continue until the position is filled. Questions and up to three additional publications may be directed to Dr. Jos. J. Schall: jschall@uvm.edu.<br /><br />The University of Vermont recently identified several &quot;Spires of Excellence&quot; in which it will strategically focus institutional investments and growth over the next several years. One spire associated with the position is Complex Systems. Candidates whose research interests align with this spire are especially encouraged to apply<a href="http://www.uvm.edu/~tri/" style="color: #0499fa; text-decoration: none;">http://www.uvm.edu/~tri/</a>.<br />The University seeks faculty who can contribute to the diversity and excellence of the academic community through their research, teaching, and/or service. Applicants are requested to include in their cover letter information about how they will further this goal. The University of Vermont is an Affirmative Action/Equal Opportunity employer. The Department is committed to increasing faculty diversity and welcomes applications from women, underrepresented ethnic, racial and cultural groups, and from people with disabilities.</p> @@ -14768,7 +15037,7 @@ install_github<span class="p">(</span><span class=" Thursday at #ESA11 - 2011-08-12T06:11:00-07:00 + 2011-08-12T15:11:00+02:00 http://recology.info//2011/08/thursday-at-esa11 <p>Interesting talks/posters:<div><br />Richard Lankau presented research on trade-offs and competitive ability. He suggests that during range expansion selection for increased intraspecific competitive ability in older populations leads to loss of traits for interspecific competitive traits due to trade-offs between these traits. <br /><br /><br />Ellner emphatically states that rapid evolution DOES matter for ecological responses, and longer-term evolutionary patterns as well. [His paper on the talk he was giving came out prior to his talk, which he pointed out, good form sir]<br /><br /><br />Lauren Sullivan gave an interesting talk on bottom up and top down effects on plant reproduction in one site of a huge network of sites doing similar nutrient and herbivory manipulations around the globe - NutNet (go here: <a href="http://nutnet.science.oregonstate.edu/">http://nutnet.science.oregonstate.edu/</a>). <br /><br /><br />Laura Prugh shows in California that the engineering effects (i.e., the mounds that they make) of giant kangaroo rats are more important for the associated food web than the species interaction effects (the proxy used was just density of rats). <br /><br /><br />Kristy Deiner suggests that chironomids are more phylogenetic similar in lakes with stocked fish relative to fishless lakes, in high elevation lakes in the Sierra Nevada. She used barcode data to generate her phylogeny of chironomids. If you have barcode data and want to search BOLD Systems site, one option is doing it from R using rbold, a package under development at <a href="http://ropensci.org/">rOpenSci</a> (code at <a href="https://github.com/sckott/rbold">Github</a>). <br /><br /><br />Jessica Gurevitch presented a large working group&#39;s methods/approach to a set of reviews on invasion biology. We didn&#39;t get to see a lot of results from this work, but I personally was glad to see her explaining to a packed room the utility of meta-analysis, and comparing to the medical field in which meta-analysis is sort of the gold standard by which to draw conclusions. <br /><br /><br />Following Jessica, Jason Fridley told us about the Evolutionary Imbalance Hypothesis (EIH) (see my notes <a href="http://schamber.wordpress.com/lab-notebook-2/">here</a>). He posed the problem of, when two biotas come together, what determines which species are retained in this new community and which species are left out. He listed a litany of traits/responses to measure to get at this problem, but suggested that with a little bit of &quot;desktop ecology&quot;, we could simply ask: Is the invasability of X region related to the phylogenetic diversity of that region? In three destination regions (Eastern Deciduous Forests, Mediterranean California, and the Czech Republic) out of four there was a positive relationship between proportion of invasive plant species in a source region and the phylogenetic diversity of the source regions. <ul></ul></div></p> @@ -14777,7 +15046,7 @@ install_github<span class="p">(</span><span class=" Wednesday at #ESA11 - 2011-08-11T06:50:00-07:00 + 2011-08-11T15:50:00+02:00 http://recology.info//2011/08/wednesday-at-esa11 <p><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">Interesting talks/posters:</span><br /><div><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;"><br /></span></div><div><ul><li><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">Ethan White&#39;s poster describing <a href="http://ecologicaldata.org/">EcologicalData.org</a> was of course awesome given my interest in getting data into the hands of ecologists over at <a href="http://ropensci.org/">rOpenSci.org</a>. Ethan also has software you can download on your machine to get the datasets you want easily - <a href="http://ecologicaldata.org/ecodata-retriever">EcoData Retriever</a>. [rOpenSci will try to take advantage of their work and allow you to call the retriever from R]</span></li> <li><span class="Apple-style-span" style="font-family: Times, 'Times New Roman', serif;">Carl Boettiger&#39;s talk was awesome. He explained how we need better tools to be able to predict collapses using early warning signals. He developed a way to estimate the statistical distribution of probabilities of system collapse.&nbsp;</span></li> @@ -14792,7 +15061,7 @@ install_github<span class="p">(</span><span class=" Monday at #ESA11 - 2011-08-08T20:17:00-07:00 + 2011-08-09T05:17:00+02:00 http://recology.info//2011/08/monday-at-esa11 <p>Monday was a good day at ESA in Austin.&nbsp;There were a few topics I promised to report on in my blogging/tweeting.<br /><br /><br />...focused on open source data. <a href="http://www.nceas.ucsb.edu/~strasser/Site/Home.html">Carly Strasser&#39;s</a> presentation on guidelines for data management was awesome (including other talks in the symposium on Creating Effective Data Management Plans for Ecological Research). Although this was a good session, I can&#39;t&nbsp;help but wish that they had hammered home the need for open science more. Oh well. Also, they talked&nbsp;a lot about how, and not a lot of why we should properly curate data. Still, a good session. One issue Carly and I talked about was tracking code in versioning systems such as <a href="https://github.com/">Github</a>. There doesn&#39;t seem to be a culture of versioning code for analyses/simulations in ecology. But when we get there...it will be easier to share/track/collaborate on &nbsp;code.<br /><br />...used R software. <a href="http://eco.confex.com/eco/2011/preliminaryprogram/abstract_31982.htm">David Jennings</a>&nbsp;talked about a meta-analysis asking if phylogenetic distance influences competition strength in pairwise experiments. David used the metafor package in R to do his meta-analysis. Good form sir.<br /><br />...did cool science. <a href="http://www.ecologicalevolution.org/people/former-lab-members/matthew-helmus/">Matt Helmus </a>presented a great talk on phylogenetic species area curves (likely using R, or Matlab maybe?).<br /><br /><br />p.s. <a href="http://ropensci.org/developers/">We</a> launched <a href="http://ropensci.org/2011/07/welcome-to-ropensci/">rOpenSci</a> today.<br /><br /><br />Oddities:<br /><ul><li>The Tilman effect - Tilman&#39;s talk was so packed it looked like there was a line waiting to get into a trendy bar. <a href="http://www.math.ualberta.ca/~ashander/images/til.jpg">Here&#39;s a picture</a>&nbsp;(credit: Jaime Ashander). Bigger room next time anyone?&nbsp;</li> <li>Wiley came out with an open source journal called <a href="http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)2045-7758">Ecology and Evolution</a>. This brings them to 3 open source journals (the other two are in other fields). We (<a href="http://ropensci.org/">rOpenSci</a>) will attempt to hand-shake with these journals.&nbsp; </li> @@ -14804,7 +15073,7 @@ install_github<span class="p">(</span><span class=" (#ESA11) rOpenSci: a collaborative effort to develop R-based tools for facilitating Open Science - 2011-08-08T08:06:00-07:00 + 2011-08-08T17:06:00+02:00 http://recology.info//2011/08/esa11-ropensci-collaborative-effort-to_08 <p>Our&nbsp;<a href="http://ropensci.org/developers/">development team</a>&nbsp;would like to announce the launch of&nbsp;<a href="http://ropensci.org/">rOpenSci</a>. As the title states, this project aims to create&nbsp;<a href="http://www.r-project.org/">R</a>&nbsp;<a href="http://cran.r-project.org/web/packages/available_packages_by_name.html">packages</a>&nbsp;to make open science more available to researchers.<br /><br /><table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"><tbody> <tr><td style="text-align: center;"><a href="http://1.bp.blogspot.com/-9YIk1e1liUU/Tj8YAoOPyQI/AAAAAAAAElQ/9gCMQ8CJORI/s1600/ropensci.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"><img border="0" src="http://1.bp.blogspot.com/-9YIk1e1liUU/Tj8YAoOPyQI/AAAAAAAAElQ/9gCMQ8CJORI/s1600/ropensci.png" /></a></td></tr> @@ -14816,7 +15085,7 @@ install_github<span class="p">(</span><span class=" Blogging/tweeting from #ESA11 - 2011-07-31T12:46:00-07:00 + 2011-07-31T21:46:00+02:00 http://recology.info//2011/07/bloggingtweeting-from-esa11 <p>I will be blogging about the upcoming Ecological Society of America meeting in Austin, TX. I will focus on discussing talks/posters that:<br /><br /><ol><li>Have taken a cool approach to using data, or</li><li>Have focused on open science/data, or</li><li>Done&nbsp;something cool with R software, or</li><li>Are just exciting in general</li></ol><div><br /></div><div>I will also tweet throughout the meeting from @recology_ (yes the underscore is part of the name, recology was already taken).&nbsp;</div><div><br /></div><div>The hashtag for the meeting this year is #ESA11</div></p> @@ -14825,7 +15094,7 @@ install_github<span class="p">(</span><span class=" Models in Evolutionary Ecology seminar, organized by Timothee Poisot - 2011-07-18T05:27:00-07:00 + 2011-07-18T14:27:00+02:00 http://recology.info//2011/07/models-in-evolutionary-ecology-seminar <p>Here is one of the talks by Thomas Brouquet, and see the rest <a href="http://www.dailymotion.com/playlist/x1mv7l_mez_en_video_cefe-colloque#videoId=xjylea">here</a>. <br /><br /><br /><iframe frameborder="0" height="270" src="http://www.dailymotion.com/embed/video/xjylea" width="480"></iframe><br /><a href="http://www.dailymotion.com/video/xjylea_thomas-broquet_tech" target="_blank">Thomas Broquet</a> <i>by <a href="http://www.dailymotion.com/mez_en_video" target="_blank">mez<em>en</em>video</a></i></p> @@ -14834,7 +15103,7 @@ install_github<span class="p">(</span><span class=" Archiving ecology/evolution data sets online - 2011-07-15T19:02:00-07:00 + 2011-07-16T04:02:00+02:00 http://recology.info//2011/07/archiving-ecologyevolution-data-sets <p><span class="Apple-style-span" style="font-family: Georgia, 'Times New Roman', 'Bitstream Charter', Times, serif; font-size: 13px; line-height: 19px;"></span><br />We now have many options for archiving data sets online:<br /><br /><a data-mce-href="http://datadryad.org/" href="http://datadryad.org/" target="_blank">Dryad</a>,&nbsp;<a data-mce-href="http://knb.ecoinformatics.org/index.jsp" href="http://knb.ecoinformatics.org/index.jsp" target="_blank">KNB</a>,&nbsp;<a data-mce-href="http://www.esapubs.org/archive/" href="http://www.esapubs.org/archive/" target="_blank">Ecological Archives</a>,&nbsp;<a data-mce-href="http://www.esapubs.org/archive/archive_D.htm" href="http://www.esapubs.org/archive/archive_D.htm" target="_blank">Ecology Data Papers</a>,&nbsp;<a data-mce-href="http://ecologicaldata.org/" href="http://ecologicaldata.org/" target="_blank">Ecological Data</a>, etc.<br /><br />However, these portals largely do not communicate with one another as far as I know, and there is no way to search over all data set sources, again, as far as I know. So, I wonder if it would ease finding of all these&nbsp;different data sets to get these different sites to get their data sets cloned on a site like Infochimps, or have links from Infochimps. &nbsp;Infochimps already has APIs (and there&#39;s an R wrapper for the Infochimps API already set up here: http://cran.r-project.org/web/packages/infochimps/index.html by Drew Conway), and they have discussions set up there, etc.<br /><br />Does it make sense to post data sets linked to published works on Infochimps? I think probably not know that I think about it. But perhaps it makes sense for other data sets, or subsets of data sets that are not linked with published works to be posted there as I know at least Dryad only accepts data sets linked with published papers.<br /><br />One use case is there was a tweet from someone recently that his students were excited about getting their data sets on their resume/CV, but didn&#39;t think there was a way to put them any place where there wasn&#39;t a precondition that the data set was linked with a published work. Seems like this&nbsp;could be a good opportunity to place these datasets on Infcohimps, and at least they are available then where a lot of people are searching for data sets, etc.<br /><br />What I think would be ideal is if Dryad, KNB, etc. could link their datasets to Infochimps, where they could be found, then users can either get them from Infochimps, or perhaps you would have to go to the Dryad site, e.g. But at least you could search over all ecological data sets then.</p> @@ -14843,7 +15112,7 @@ install_github<span class="p">(</span><span class=" CRdata vs. Cloudnumbers - 2011-07-14T06:31:00-07:00 + 2011-07-14T15:31:00+02:00 http://recology.info//2011/07/crdata-vs-cloudnumbers <p><a href="http://www.cloudnumbers.com/">Cloudnumbers</a> and <a href="http://crdata.org/">CRdata</a> are two new cloud computing services.<br /><br /><br />I tested the two services with a very simple script. The script simply creates a dataframe of 10000 numbers via rnorm, and assigns them to a factor of one of two levels (a or b). I then take the mean of the two factor levels with the aggregate function.<br /><br /><br />In CRdata you need to put in some extra code to format the output in a browser window. For example, the last line below needs to have &#39;&lt;crdata<em>object&gt;&#39; on both sides of the output object so it can be rendered in a browser. And etc. for other things that one would print to a console. Whereas you don&#39;t need this extra code for using Cloudnumbers.<br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">&nbsp;<br />dat &lt;- <a href="http://inside-r.org/r-doc/base/data.frame"><span style="color: #003399; font-weight: bold;">data.frame</span></a><span style="color: #009900;">(</span>n = <a href="http://inside-r.org/r-doc/stats/rnorm"><span style="color: #003399; font-weight: bold;">rnorm</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">10000</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> p = <a href="http://inside-r.org/r-doc/base/rep"><span style="color: #003399; font-weight: bold;">rep</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: blue;">&#39;a&#39;</span><span style="color: #339933;">,</span><span style="color: blue;">&#39;b&#39;</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> each=<span style="color: #cc66cc;">5000</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br />&nbsp;<br />out &lt;- <a href="http://inside-r.org/r-doc/stats/aggregate"><span style="color: #003399; font-weight: bold;">aggregate</span></a><span style="color: #009900;">(</span>n ~ p<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = dat<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;">#&lt;crdata</em>object&gt;out&lt;/crdata_object&gt;</span></pre></div></div><br /><br />Here is a screenshot of the output from CRdata with the simple script above.<br /><img height="208" src="http://f.cl.ly/items/1D090q2N0Y3a410W262V/Screen%20shot%202011-07-14%20at%208.04.33%20AM.png" width="400" /><br />This simple script ran in about 20 seconds or so from starting the job to finishing. However, it seems like the only output option is html. Can this be right? This seems like a terrible only option.<br /><br /><br />In Cloudnumbers you have to start a workspace, upload your R code file.<br />Then, start a session...<br />choose your software platform...<br />choose packages (one at a time, very slow)...<br />then choose number of clusters, etc.<br />Then finally star the job.<br />Then it initializes, then finally you can open the console, and<br />Then from here it is like running R as you normally would, except on the web.<br /><br /><br /><u>Who wins (at least for our very minimal example above)</u><br /><br /><ol><li>Speed of entire process (not just running code): CRdata</li><li>Ease of use: CRdata</li><li>Cost: CRdata (free only)</li><li>Least annoying: Cloudnumbers (you don&#39;t have to add in extra code to run your own code)</li><li>Opensource: CRdata (you can use publicly available code on the site)</li><li>Long-term use: Cloudnumbers (more powerful, flexible, etc.)</li></ol><div><br /></div><div>I imagine Cloudnumbers could be faster for larger jobs, but you would have to pay for the speed of course.&nbsp;</div><div><br /></div><div>What I really want to see is a cloud computing service that accepts code directly run from R or RStudio. Hmmm...that would be so tasty indeed. I think<a href="http://cloudnumbers.zendesk.com/entries/20199198-using-an-external-terminal-ssh-console"> Cloudnumbers may be able to do this</a>, but haven&#39;t tested it yet.&nbsp;&nbsp;</div><div><br /></div><div>Perhaps using the server version of RStudio along with Amazon&#39;s EC2 is a better option than both of these. See Karthik Ram&#39;s <a href="http://inundata.org/2011/03/30/r-ec2-rstudio-server/">post</a> about using RStudio server along with Amazon&#39;s EC2. Even just running RStudio server on your Unbuntu machine or virtual machine is a pretty cool option, even without EC2 (works like a charm on my Parallels Ubuntu vm on my Mac).&nbsp;</div></p> @@ -14852,7 +15121,7 @@ install_github<span class="p">(</span><span class=" rbold: An R Interface for Bold Systems barcode repository - 2011-06-28T07:27:00-07:00 + 2011-06-28T16:27:00+02:00 http://recology.info//2011/06/rbold-an-r-interface-for-bold-systems-barcode-repository <p>Have you ever wanted to <a href="http://services.boldsystems.org/index.php?page=1_esearch&amp;status=">search</a> and <a href="http://services.boldsystems.org/index.php?page=2_efetch&amp;status=">fetch</a> barcode data from <a href="http://www.boldsystems.org/views/login.php">Bold Systems</a>?<br /><br />I am developing functions to interface with Bold from R. I just started, but hopefully folks will find it useful.<br /><br />The code is at Github <a href="https://github.com/ropensci/rbold">here</a>. The two functions are still very buggy, so please bring up issues below, or in the Issues area on Github. For example, some searches work and other similar searches don&#39;t. Apologies in advance for the bugs.<br /><br />Below is a screenshot of an example query using function getsampleids to get barcode identifiers for specimens. You can then use getseqs function to grab barcode data for specific specimens or many specimens.<br /><img alt="Screen shot 2011-06-28 at 9.24.00 AM.png" height="267" src="http://cl.ly/1V1y1Q1A0t062F2y2308/Screen_shot_2011-06-28_at_9.24.00_AM.png" width="400" /></p> @@ -14861,7 +15130,7 @@ install_github<span class="p">(</span><span class=" iEvoBio 2011 Synopsis - 2011-06-22T19:38:00-07:00 + 2011-06-23T04:38:00+02:00 http://recology.info//2011/06/ievobio-2011-synopsis <p>We just wrapped up the <a href="http://ievobio.org/">2011 iEvoBio meeting</a>. It was awesome! If you didn&#39;t go this year or last year, definitely think about going next year.<br /><div><br /></div><div><br /></div><div>Here is a list of the cool projects that were discussed at the meeting (apologies if I left some out):</div><div><ol><li><a href="http://vistrails.org/index.php/Main_Page">Vistrails</a>: workflow tool, awesome project by Claudio Silva</li><li><a href="http://www.commplish.com/">Commplish</a>: purpose is to use via API&#39;s, not with the web UI</li><li><a href="http://phylopic.org/">Phylopic</a>: a database of life-form silouhettes, including an API for remote access, sweet!</li><li><a href="http://gloome.tau.ac.il/">Gloome</a></li><li><a href="http://www.mappinglife.org/">MappingLife</a>: awesome geographic/etc data visualization interace on the web</li><li><a href="http://bioinfolab.unl.edu/~canderson/SuiteMSA/">SuiteSMA</a>: visualizating multiple alignments</li><li><a href="https://github.com/cboettig/treeBASE">treeBASE</a>: R interface to treebase, by <a href="http://www.carlboettiger.info/">Carl Boettiger</a></li><li><a href="http://vertnet.org/index.php">VertNet</a>: database for vertebrate natural history collections</li><li><a href="http://sourceforge.net/projects/revbayes/">RevBayes</a>: revamp of MrBayes, with GUI, etc.&nbsp;</li><li><a href="https://www.phenoscape.org/wiki/Main_Page">Phenoscape Knowledge Base</a></li><ul><li>Peter Midford lightning talk: talked about matching taxonomic and genetic data</li></ul><li><a href="http://biscicol.blogspot.com/p/biscicol-prototype.html">BiSciCol</a>: biological science collections tracker</li><li><a href="http://www.ontogrator.org/">Ontogrator</a>&nbsp;</li><li><a href="http://tnrs.iplantcollaborative.org/">TNRS</a>: taxonomic name resolution service</li><li><a href="http://www.boldsystems.org/views/login.php">Barcode of Life data systems</a>, and <a href="http://services.boldsystems.org/">remote access</a></li><li><a href="http://www.mooreabiocode.org/about">Moorea Biocode Project</a></li><li><a href="http://amarallab.mbl.edu/mirada/mirada.html">Microbial LTER&#39;s data</a></li><li><a href="http://birdvis.org/">BirdVis</a>: interactive bird data visualization (Claudio Silva&nbsp;in collaboration with Cornell Lab of Ornithology)</li><li><a href="http://www.crowdlabs.org/">Crowdlabs</a>: I think the site is down right now, another project by Claudio Silva</li><li><a href="http://hydrodictyon.eeb.uconn.edu/projects/phycas/index.php/Phycas_Home">Phycas</a>: Bayesian phylogenetics, can you just call this from R?</li><li>RIP MrBayes!!!! replaced by RevBayes (see 9 above)</li><li>Slides of presentations will be at&nbsp;<a href="http://www.slideshare.net/event/ievobio11">Slideshare</a> (not all presentations up yet) &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;</li><li>A birds of a feather group I was involved in proposed an idea (TOL-o-matic) like Phylomatic, but of broader scope, for easy access and submission of trees, and perhaps even social (think just pushing a &#39;SHARE&#39; button within PAUP, RevBayes, or other phylogenetics software)!&nbsp;</li><li>Synopses of Birds of a Feather discussion groups:&nbsp;<a href="http://piratepad.net/iEvoBio11-BoF-reportouts">http://piratepad.net/iEvoBio11-BoF-reportouts</a></li></ol></div></p> @@ -14870,7 +15139,7 @@ install_github<span class="p">(</span><span class=" PLoS journals API from R: "rplos" - 2011-06-21T06:30:00-07:00 + 2011-06-21T15:30:00+02:00 http://recology.info//2011/06/plos-journals-api-from-r-rplos- <p>The Public Libraries of Science (PLOS) has an API so&nbsp;that developers can create cool tools to access their data (including full text papers!!).<br /><br />Carl Boettiger at UC Davis and I are working on R functions that use the PLoS API. See our code on Github <a href="https://github.com/ropensci/rplos"><span class="Apple-style-span" style="color: red;">here</span></a>. See the wiki at the Github page for examples of use. We hope to deploy rplos as a package someday soon. Please feel free to suggest changes/additions rplos in the comments below or on the Github/rplos site.<br /><br />Get your own API key <a href="http://api.plos.org/"><span class="Apple-style-span" style="color: red;">here</span></a>.</p> @@ -14879,7 +15148,7 @@ install_github<span class="p">(</span><span class=" ggplot2 talk by Hadley Whickam at Google - 2011-06-17T09:40:00-07:00 + 2011-06-17T18:40:00+02:00 http://recology.info//2011/06/ggplot2-talk-by-hadley-whickam-at-google <iframe allowfullscreen="" frameborder="0" height="349" src="http://www.youtube.com/embed/TaxJwC_MP9Q" width="560"></iframe> @@ -14888,7 +15157,7 @@ install_github<span class="p">(</span><span class=" OpenStates from R via API: watch your elected representatives - 2011-06-10T21:19:00-07:00 + 2011-06-11T06:19:00+02:00 http://recology.info//2011/06/openstates-from-r-via-api-watch-your-elected-representatives <p>I am writing some functions to acquire data from the <a href="http://openstates.sunlightlabs.com/">OpenStates project,</a>&nbsp;via <a href="http://openstates.sunlightlabs.com/api/">their API</a>. They have <a href="http://groups.google.com/group/fifty-state-project">a great support community</a> at Google Groups as well.<br /><br />On its face this post is not obviously about ecology or evolution, but well, our elected representatives do, so to speak, hold our environment in a noose, ready to let the Earth hang any day.<br /><br />Code <a href="https://SChamberlain@github.com/SChamberlain/ropstates.git">I am developing is over at Github</a>.<br /><br />Here is an example of its use in R, in this case using the Bill Search option (billsearch.R on my Github site), and in this case you do not provide your API key in the function call, but instead put it in your .Rprofile file, which is called when you open R. We are searching here for the term &#39;agriculture&#39; in Texas (&#39;tx&#39;), in the &#39;upper&#39; chamber.<br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">&gt; temp &lt;- billsearch<span style="color: #009900;">(</span><span style="color: blue;">&#39;agriculture&#39;</span><span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/datasets/state"><span style="color: #003399; font-weight: bold;">state</span></a> = <span style="color: blue;">&#39;tx&#39;</span><span style="color: #339933;">,</span> chamber = <span style="color: blue;">&#39;upper&#39;</span><span style="color: #009900;">)</span><br />&nbsp;<br />&gt; <a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>temp<span style="color: #009900;">)</span><br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: #cc66cc;">21</span><br />&nbsp;<br />&gt; temp<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><br />$title<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;Congratulating John C. Padalino of El Paso for being appointed to the United States Department of Agriculture.&quot;</span><br />&nbsp;<br />$created<em>at<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;2010-08-11 07:59:46&quot;</span><br />&nbsp;<br />$updated</em>at<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;2010-09-02 03:34:39&quot;</span><br />&nbsp;<br />$chamber<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;upper&quot;</span><br />&nbsp;<br />$state<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;tx&quot;</span><br />&nbsp;<br />$session<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;81&quot;</span><br />&nbsp;<br />$type<br />$type<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;resolution&quot;</span><br />&nbsp;<br />&nbsp;<br />$subjects<br />$subjects<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;Resolutions&quot;</span><br />&nbsp;<br />$subjects<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">2</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;Other&quot;</span><br />&nbsp;<br />&nbsp;<br />$bill<em>id<br /><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <span style="color: blue;">&quot;SR 1042&quot;</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><br />Apparently, the first bill (SR 2042, see $bill</em>id at the bottom of the list output) that came up was to congratulate John Paladino for being appointed to the USDA.<br /><br />The other function I have ready is getting basic metadata on a state, called statemetasearch.<br /><br />I plan to develop more functions for all the possible API calls to the OpenStates project.</p> @@ -14897,7 +15166,7 @@ install_github<span class="p">(</span><span class=" Just turned on 'mobile template' for this blog, let me know if it works - 2011-06-09T10:50:00-07:00 + 2011-06-09T19:50:00+02:00 http://recology.info//2011/06/just-turned-on-mobile-template-for-this-blog-let-me-know-if-it-works <div class="separator" style="clear: both; text-align: center;"><a href="http://f.cl.ly/items/0V2S0T1k3X3G2p0W2p1e/Screen%20shot%202011-06-09%20at%2012.49.15%20PM.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="161" src="http://f.cl.ly/items/0V2S0T1k3X3G2p0W2p1e/Screen%20shot%202011-06-09%20at%2012.49.15%20PM.png" width="400" /></a></div> @@ -14906,7 +15175,7 @@ install_github<span class="p">(</span><span class=" > 10,000 visits to Recology!!!! - 2011-06-09T10:09:00-07:00 + 2011-06-09T19:09:00+02:00 http://recology.info//2011/06/10000-visits-to-my-recology <p>Thanks so much everyone for stopping by!<br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://f.cl.ly/items/3m2g3b3r2o1b1k1b3x27/Screen%20shot%202011-06-09%20at%2012.07.33%20PM.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="337" src="http://f.cl.ly/items/3m2g3b3r2o1b1k1b3x27/Screen%20shot%202011-06-09%20at%2012.07.33%20PM.png" width="400" /></a></div></p> @@ -14915,7 +15184,7 @@ install_github<span class="p">(</span><span class=" How to fit power laws - 2011-06-07T13:24:00-07:00 + 2011-06-07T22:24:00+02:00 http://recology.info//2011/06/how-to-fit-power-laws <p>A new paper out in Ecology by Xiao and colleagues (in press, <a href="http://www.esajournals.org/doi/abs/10.1890/11-0538.1">here</a>) compares the use of log-transformation to non-linear regression for analyzing power-laws.<br /><br />They suggest that the error distribution should determine which method performs better. When your errors are additive, homoscedastic, and normally distributed, they propose using non-linear regression. When errors are multiplicative, heteroscedastic, and lognormally distributed, they suggest using linear regression on log-transformed data. The assumptions about these two methods are different, so cannot be correct for a single dataset.<br /><br />They will provide their R code for their methods once they are up on Ecological Archives (they weren&#39;t up there by the time of this post).</p> @@ -14924,7 +15193,7 @@ install_github<span class="p">(</span><span class=" searching ITIS and fetching Phylomatic trees - 2011-06-03T15:35:00-07:00 + 2011-06-04T00:35:00+02:00 http://recology.info//2011/06/searching-itis-and-fetching-phylomatic-trees <p>I am writing a set of functions to search&nbsp;ITIS for taxonomic information (more databases to come) and functions to fetch plant phylogenetic trees from Phylomatic. <a href="https://github.com/sckott/taxize_">Code at github</a>.<br /><br />Also, see the examples in the demos folder on the Github site above.<br /><br /><br /><br /><script src="https://gist.github.com/1007288.js?file=taxize_example.R"></script><br /><br /><br /><br /><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/-rcw5OIf3Hak/Telhj896L0I/AAAAAAAAEh4/p6GhpNRW6IA/s1600/examplephylogenyplot.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="http://1.bp.blogspot.com/-rcw5OIf3Hak/Telhj896L0I/AAAAAAAAEh4/p6GhpNRW6IA/s400/examplephylogenyplot.png" width="400" /></a></div></p> @@ -14933,7 +15202,7 @@ install_github<span class="p">(</span><span class=" phylogenetic signal simulations - 2011-05-18T06:13:00-07:00 + 2011-05-18T15:13:00+02:00 http://recology.info//2011/05/phylogenetic-signal-simulations <p>I did a little simulation to examine how K and lambda vary in response to tree size (and how they compare to each other on the same simulated trees). I use Liam Revell&#39;s functions fastBM to generate traits, and phylosig to measure phylogenetic signal.</p> @@ -14962,7 +15231,7 @@ install_github<span class="p">(</span><span class=" A simple function for plotting phylogenies in ggplot2 - 2011-05-17T12:37:00-07:00 + 2011-05-17T21:37:00+02:00 http://recology.info//2011/05/simple-function-for-plotting <p>UPDATE: Greg jordan has a much more elegant way of plotting trees with ggplot2. See his links in the comments below.<br /><br /><br />I wrote a simple function for plotting a phylogeny in ggplot2. However, it only handles a 3 species tree right now, as I haven&#39;t figured out how to generalize the approach to N species.<br /><br />Any ideas on how to improve this?<br /><br /><br /><br /><script src="https://gist.github.com/977207.js?file=ggtree_v1.R"></script></p> @@ -14971,7 +15240,7 @@ install_github<span class="p">(</span><span class=" plyr's idata.frame VS. data.frame - 2011-05-13T13:16:00-07:00 + 2011-05-13T22:16:00+02:00 http://recology.info//2011/05/plyrs-idataframe-vs-dataframe <hr> @@ -15108,7 +15377,7 @@ summaryBy<span class="p">(</span>rbi <span class=" google reader - 2011-05-12T11:10:00-07:00 + 2011-05-12T20:10:00+02:00 http://recology.info//2011/05/google-reader <p>I just realized that the gists code blocks don&#39;t show up in Google Reader, so you have to click the link to my blog to see the gists. Apologies for that!<br /><br />-S</p> @@ -15117,7 +15386,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Comparison of functions for comparative phylogenetics - 2011-05-11T08:08:00-07:00 + 2011-05-11T17:08:00+02:00 http://recology.info//2011/05/comparison-of-functions-for-comparative-phylogenetics <p>With all the packages (and beta stage groups of functions) for comparative phylogenetics in R (tested here: picante, geiger, ape, motmot, Liam Revell&#39;s functions), I was simply interested in which functions to use in cases where multiple functions exist to do the same thing. I only show default settings, so perhaps these functions would differ under different parameter settings. &nbsp;[I am using a Mac 2.4 GHz i5, 4GB RAM]<br /><br />Get motmot here:&nbsp;<a href="https://r-forge.r-project.org/R/?group_id=782">https://r-forge.r-project.org/R/?group_id=782</a><br />Get Liam Revell&#39;s functions here:&nbsp;<a href="http://anolis.oeb.harvard.edu/~liam/R-phylogenetics/">http://anolis.oeb.harvard.edu/~liam/R-phylogenetics/</a></p> @@ -15134,7 +15403,7 @@ summaryBy<span class="p">(</span>rbi <span class=" RHIPE package in R for interfacing between Hadoop and R - 2011-05-04T17:11:00-07:00 + 2011-05-05T02:11:00+02:00 http://recology.info//2011/05/rhipe-package-in-r-for-interfacing-between-hadoop-and-r <p><div><strong><a href="http://www.lecturemaker.com/2011/02/rhipe/#video" title="Click link to go to the video page">RHIPE: An Interface Between Hadoop and R</a></strong><br />Presented by Saptarshi Guha</div><a href="http://www.lecturemaker.com/2011/02/rhipe/#video"><img alt="Video Link" border="0" height="169" src="http://www.lecturemaker.com/lectures/RMeetUp2010/RHIPE_Lecture.jpg" title="Click image to go to the video page" width="300" /></a> <br /><br /><br />And <a href="http://blog.piccolboni.info/2011/04/looking-for-map-reduce-language.html">this review</a> of methods for interfacing with Hadoop suggests R&#39;s RHIPE is quite nice.</p> @@ -15143,7 +15412,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Treebase trees from R - 2011-05-03T08:00:00-07:00 + 2011-05-03T17:00:00+02:00 http://recology.info//2011/05/treebase-trees-from-r <p>UPDATE: See Carl Boettiger&#39;s functions/package at Github for searching Treebase <a href="https://github.com/ropensci/treeBASE">here</a>.</p> @@ -15168,7 +15437,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Processing nested lists - 2011-04-28T15:41:00-07:00 + 2011-04-29T00:41:00+02:00 http://recology.info//2011/04/processing-nested-lists <p>So perhaps you have all figured this out already, but I was excited to figure out how to finally neatly get all the data frames, lists, vectors, etc. out of a nested list. It is as easy as nesting calls to the apply family of functions, in the case below, using plyr&#39;s apply like functions. Take this example:<br /><br /><br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R"><span style="color: #666666; font-family: monospace; font-style: italic;"># Nested lists code, an example</span><br /><span style="color: #666666; font-family: monospace; font-style: italic;"># Make a nested list</span><br />mylist <span style="font-family: monospace;">&lt;-</span> <a href="http://inside-r.org/r-doc/base/list" style="font-family: monospace;"><span style="color: #003399; font-weight: bold;">list</span></a><span style="color: #009900; font-family: monospace;">(</span><span style="color: #009900; font-family: monospace;">)</span><br />mylist_ <span style="font-family: monospace;">&lt;-</span> <a href="http://inside-r.org/r-doc/base/list" style="font-family: monospace;"><span style="color: #003399; font-weight: bold;">list</span></a><span style="color: #009900; font-family: monospace;">(</span><span style="color: #009900; font-family: monospace;">)</span><br /><span style="color: black; font-family: monospace; font-weight: bold;">for</span><span style="color: #009900; font-family: monospace;">(</span>i <span style="color: black; font-family: monospace; font-weight: bold;">in</span> <span style="color: #cc66cc; font-family: monospace;">1</span><span style="font-family: monospace;">:</span><span style="color: #cc66cc; font-family: monospace;">5</span><span style="color: #009900; font-family: monospace;">)</span> <span style="color: #009900; font-family: monospace;">{</span><br /> <span style="color: black; font-family: monospace; font-weight: bold;">for</span><span style="color: #009900; font-family: monospace;">(</span>j <span style="color: black; font-family: monospace; font-weight: bold;">in</span> <span style="color: #cc66cc; font-family: monospace;">1</span><span style="font-family: monospace;">:</span><span style="color: #cc66cc; font-family: monospace;">5</span><span style="color: #009900; font-family: monospace;">)</span> <span style="color: #009900; font-family: monospace;">{</span><br /> mylist<span style="color: #009900; font-family: monospace;">[</span><span style="color: #009900; font-family: monospace;">[</span>j<span style="color: #009900; font-family: monospace;">]</span><span style="color: #009900; font-family: monospace;">]</span> <span style="font-family: monospace;">&lt;-</span> i<span style="font-family: monospace;">*</span>j<br /> <span style="color: #009900; font-family: monospace;">}</span><br />mylist<em><span style="color: #009900; font-family: monospace;">[</span><span style="color: #009900; font-family: monospace;">[</span>i<span style="color: #009900; font-family: monospace;">]</span><span style="color: #009900; font-family: monospace;">]</span> <span style="font-family: monospace;">&lt;-</span> mylist<br /><span style="color: #009900; font-family: monospace;">}</span><br />&nbsp;<br /><span style="color: #666666; font-family: monospace; font-style: italic;"># return values from first part of list</span><br />laply<span style="color: #009900; font-family: monospace;">(</span>mylist</em><span style="color: #009900; font-family: monospace;">[</span><span style="color: #009900; font-family: monospace;">[</span><span style="color: #cc66cc; font-family: monospace;">1</span><span style="color: #009900; font-family: monospace;">]</span><span style="color: #009900; font-family: monospace;">]</span><span style="color: #339933; font-family: monospace;">,</span> <a href="http://inside-r.org/r-doc/base/identity" style="font-family: monospace;"><span style="color: #003399; font-weight: bold;">identity</span></a><span style="color: #009900; font-family: monospace;">)</span><br />[1] 1 2 3 4 5</pre><pre class="r geshifilter-R">&nbsp;<br /><span style="color: #666666; font-family: monospace; font-style: italic;"># return all values</span><br />laply<span style="color: #009900; font-family: monospace;">(</span>mylist<em><span style="color: #339933; font-family: monospace;">,</span> <a href="http://inside-r.org/r-doc/base/function" style="font-family: monospace;"><span style="color: #003399; font-weight: bold;">function</span></a><span style="color: #009900; font-family: monospace;">(</span>x<span style="color: #009900; font-family: monospace;">)</span> laply<span style="color: #009900; font-family: monospace;">(</span>x<span style="color: #339933; font-family: monospace;">,</span> <a href="http://inside-r.org/r-doc/base/identity" style="font-family: monospace;"><span style="color: #003399; font-weight: bold;">identity</span></a><span style="color: #009900; font-family: monospace;">)</span><span style="color: #009900; font-family: monospace;">)</span><br /> 1 2 3 4 5<br />[1,] 1 2 3 4 5<br />[2,] 2 4 6 8 10<br />[3,] 3 6 9 12 15<br />[4,] 4 8 12 16 20<br />[5,] 5 10 15 20 25</pre><pre class="r geshifilter-R" style="font-family: monospace;">&nbsp;<br /><span style="color: #666666; font-style: italic;"># perform some function, in this case sqrt of each value</span><br />laply<span style="color: #009900;">(</span>mylist</em><span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/base/function"><span style="color: #003399; font-weight: bold;">function</span></a><span style="color: #009900;">(</span>x<span style="color: #009900;">)</span> laply<span style="color: #009900;">(</span>x<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/base/function"><span style="color: #003399; font-weight: bold;">function</span></a><span style="color: #009900;">(</span>x<span style="color: #009900;">)</span> <a href="http://inside-r.org/r-doc/base/sqrt"><span style="color: #003399; font-weight: bold;">sqrt</span></a><span style="color: #009900;">(</span>x<span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span></pre><pre class="r geshifilter-R"><span style="color: #009900;"> </span><br /> 1 2 3 4 5<br />[1,] 1.000000 1.414214 1.732051 2.000000 2.236068<br />[2,] 1.414214 2.000000 2.449490 2.828427 3.162278<br />[3,] 1.732051 2.449490 3.000000 3.464102 3.872983<br />[4,] 2.000000 2.828427 3.464102 4.000000 4.472136<br />[5,] 2.236068 3.162278 3.872983 4.472136 5.000000<br /></pre></div></div><br /><br /><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a></p> @@ -15177,7 +15446,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Running Phylip's contrast application for trait pairs from R - 2011-04-26T05:40:00-07:00 + 2011-04-26T14:40:00+02:00 http://recology.info//2011/04/running-phylip-s-contrast-application-for-trait-pairs-from-r <p>Here is some code to run Phylip&#39;s contrast application from R and get the output within R to easily manipulate yourself. Importantly, the code is written specifically for trait pairs only as the regular expression work in the code specifically grabs data from contast results when only two traits are input. You could easily change the code to do N traits. Note that the p-value calculated for the chi-square statistic is not output from contrast, but is calculated within the function &#39;PhylipWithinSpContr&#39;. In the code below there are two functions that make&nbsp;a lot of busy work easier: &#39;WritePhylip&#39; and&nbsp;&#39;PhylipWithinSpContr&#39;. The first function is nice because the formatting required for data input to Phylip programs is so, well, awkward &nbsp;- and this function does it for you. The second function runs contrast and retrieves the output data. The example data set I produce in the code below has multiple individuals per species, so that contrasts are calculated taking into account within species variation. Get Phylip&#39;s contrast documentation <a href="http://evolution.genetics.washington.edu/phylip/doc/contrast.html">here</a>.<br /><br />Note that the data input format allows only 10 characters for the species name, so I suggest if your species names are longer than 10 characters use the function abbreviate, for example, to shorten all names to no longer than 10 characters. Also, within the function WritePhylip I concatenate species names and their number of individuals per species leaving plenty of space.<br /><br />Also, mess around with the options in the &quot;system&quot; call to get what you want. For example, I used &quot;R&quot;, &quot;W&quot; and &quot;Y&quot;, meaning replace old outfile (R), then turn on within species analyses (W), then accept all options (Y). E..g, if you don&#39;t have an old outfile, then you obviously don&#39;t need to replace the old file with the &quot;R&quot; command.<br /><br />(p.s. I have not tried this on a windows machine).<br /><br /><br /><script src="https://gist.github.com/942176.js?file=phylip_fromR.R"></script><br /><br />Here is example output:<br /><br /><span class="Apple-style-span" style="font-family: Monaco; font-size: 12px; line-height: 17px; white-space: pre-wrap;"></span><br /><pre class="G1dpdwhmFL" style="border-bottom-style: none; border-color: initial; border-left-style: none; border-right-style: none; border-top-style: none; border-width: initial; font-family: Monaco; font-size: 9pt !important; line-height: 1.45; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; outline-color: initial; outline-style: none; outline-width: initial; white-space: pre-wrap !important;" tabindex="0"><span class="G1dpdwhmIL ace_keyword" style="white-space: pre;">&gt; </span><span class="G1dpdwhmMK ace_keyword">datout<br /></span> names2 dat...1. dat...2.<br />1 VarAIn<em>VarAest 0.000110 -0.000017<br />2 VarAIn</em>VarAest -0.000017 0.000155<br />3 VarAIn<em>VarEest 0.790783 -0.063097<br />4 VarAIn</em>VarEest -0.063097 0.981216<br />5 VarAIn<em>VarAreg 1.000000 -0.107200<br />6 VarAIn</em>VarAreg -0.151800 1.000000<br />7 VarAIn<em>VarAcorr 1.000000 -0.127600<br />8 VarAIn</em>VarAcorr -0.127600 1.000000<br />9 VarAIn<em>VarEreg 1.000000 -0.064300<br />10 VarAIn</em>VarEreg -0.079800 1.000000<br />11 VarAIn<em>VarEcorr 1.000000 -0.071600<br />12 VarAIn</em>VarEcorr -0.071600 1.000000<br />13 VarAOut<em>VarEest 0.790734 -0.063104<br />14 VarAOut</em>VarEest -0.063104 0.981169<br />15 VarAOut<em>VarEreg 1.000000 -0.064300<br />16 VarAOut</em>VarEreg -0.079800 1.000000<br />17 VarAOut<em>VarEcorr 1.000000 -0.071600<br />18 VarAOut</em>VarEcorr -0.071600 1.000000<br />19 logL<em>withvar</em>df -68.779770 6.000000<br />20 logL<em>withoutvar</em>df -68.771450 3.000000<br />21 chisq<em>df -0.016640 3.000000<br />22 chisq</em>p 1.000000 -999.000000</pre></p> @@ -15186,7 +15455,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Phylometa from R: Randomization via Tip Shuffle - 2011-04-16T11:44:00-07:00 + 2011-04-16T20:44:00+02:00 http://recology.info//2011/04/phylometa-from-r-randomization-via-tip-shuffle <p>---UPDATE: I am now using code formatting from gist.github, so I replaced the old prettyR code (sorry guys). The github way is much easier and prettier. I hope readers like the change.<br /><br /><br /><br /><br /><a href="http://r-ecology.blogspot.com/2011/04/phylometa-from-r-udpate.html">I wrote earlier</a> about some code I wrote for running Phylometa (software to do phylogenetic meta-analysis) from R.<br /><br />I have been concerned about what exactly is the right penalty for including phylogeny in a meta-analysis. E.g.: AIC is calculated from Q in Phylometa, and Q increases with tree size.<br /><br />So, I wrote some code to shuffle the tips of your tree N number of times, run Phylometa, and extract just the &quot;Phylogenetic MA&quot; part of the output. So, we compare the observed output (without tip shuffling) to the distribution of the tip shuffled output, and we can calculate a P-value from that. The code I wrote simply extracts the pooled effect size for fixed and also random-effects models. But you could change the code to extract whatever you like for the randomization.<br /><br />I think the point of this code is not to estimate your pooled effects, etc., but may be an alternative way to compare traditional to phylogenetic MA where hopefully simply incorporating a tree is not penalizing the meta-analysis so much&nbsp;that you will&nbsp;always accept the traditional MA as better.<br /><br />Get the code <a href="https://gist.github.com/925343#file_phylometa_rand_fxn_one.r">here</a>, and also below. Get the example <a href="http://wp.me/PRT1F-2R">tree file</a> and <a href="http://wp.me/PRT1F-2S">data file</a>, named &quot;phylogeny.txt&quot; and &quot;metadata<em>2g.txt&quot;, respectively below (or use your own data!). You need the file &quot;phylometa</em>fxn.r&quot; from my website, get <a href="https://gist.github.com/939970">here</a>, but just call it using source as seen below.<br /><br /><script src="https://gist.github.com/925343.js?file=phylometa_rand_fxn_one.R"></script><br /><br />As you can see, the observed values fall well within the distribution of values obtained from shuffling tips. &nbsp;P-values were 0.64 and 0.68 for fixed- and random-effects MA&#39;s, respectively. This suggests, to me at least, that the traditional (distribution of tip shuffled analyses, the histograms below) and phylogenetic (red lines) MA&#39;s are not&nbsp;different. The way I would use this is as an additional analysis to the actual Phylometa output.<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/-fpEjXUBvAw8/TanftVw49QI/AAAAAAAAEbY/z9rJKThxRMo/s1600/metadata_2g.txt.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="400" src="http://4.bp.blogspot.com/-fpEjXUBvAw8/TanftVw49QI/AAAAAAAAEbY/z9rJKThxRMo/s400/metadata_2g.txt.jpeg" width="400" /></a></div></p> @@ -15195,7 +15464,7 @@ summaryBy<span class="p">(</span>rbi <span class=" RStudio Beta 2 is Out! - 2011-04-11T07:33:00-07:00 + 2011-04-11T16:33:00+02:00 http://recology.info//2011/04/rstudio-beta-2-is-out- <p><a href="http://blog.rstudio.org/2011/04/11/rstudio-beta2/">RStudio Beta 2 (v0.93) « RStudio Blog</a><div><br /></div><div>A new beta version of RStudio is out! </div></p> @@ -15204,7 +15473,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Adjust branch lengths with node ages: comparison of two methods - 2011-04-10T11:02:00-07:00 + 2011-04-10T20:02:00+02:00 http://recology.info//2011/04/adjust-branch-lengths-with-node-ages-comparison-of-two-methods <p>Here is an approach for comparing two methods of adjusting branch lengths on trees: bladj in the program Phylocom and a fxn written by Gene Hunt at the Smithsonian.<br /><br />Get the code and example files (tree and node ages) <a href="https://gist.github.com/938313">here</a><br />Get phylocom here:&nbsp;<a href="http://www.phylodiversity.net/phylocom/">http://www.phylodiversity.net/phylocom/</a><br /><br />Gene Hunt&#39;s method has many options you can mess with, including setting tip ages (not available in bladj), setting node ages, and minimum branch length imposed. You will notice that Gene&#39;s method may be not the appropriate if you only have extant taxa.<br /><br />The function AdjBrLens uses as input a newick tree file and a text file of node ages, and uses functions you can simply run by &quot;source&quot; the R file bladjing<em>twomethods.R file from <a href="https://gist.github.com/938313">here</a>.<br /><br />Note that blad does not like numbers for node names, so you have to put a character in front of a number of just character names for nodes.<br /><br /><br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;"># This is where the work happens... </span><br /><span style="color: #666666; font-style: italic;"># Directory below needs to have at least three items:</span><br /><span style="color: #666666; font-style: italic;"># 1. phylocom executable for windows or mac</span><br /><span style="color: #666666; font-style: italic;"># 2. tree newick file</span><br /><span style="color: #666666; font-style: italic;"># 3. node ages file as required by phylocom, see their manual</span><br /><span style="color: #666666; font-style: italic;"># Output: trees</em>out is a list of three trees, the original, bladj, and Gene Hunt&#39;s method</span><br /><span style="color: #666666; font-style: italic;"># Also, within the function all three trees are written to file as PDFs</span><br /><a href="http://inside-r.org/r-doc/base/setwd"><span style="color: #003399; font-weight: bold;">setwd</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;/Mac/R<em>stuff/Blog</em>etc/Bladjing&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># set working directory</span><br /><a href="http://inside-r.org/r-doc/base/source"><span style="color: #003399; font-weight: bold;">source</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;bladjing<em>twomethods.R&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># run functions from source file</span><br />trees</em>out &lt;- AdjBrLens<span style="color: #009900;">(</span><span style="color: blue;">&quot;tree.txt&quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;nodeages.txt&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># plot trees of three methods together, </span><br /><span style="color: #666666; font-style: italic;"># with nodes with age estimates labeled</span><br /><a href="http://inside-r.org/r-doc/grDevices/jpeg"><span style="color: #003399; font-weight: bold;">jpeg</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;threeplots.jpeg&quot;</span><span style="color: #339933;">,</span> quality=<span style="color: #cc66cc;">100</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/layout"><span style="color: #003399; font-weight: bold;">layout</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/matrix"><span style="color: #003399; font-weight: bold;">matrix</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">3</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/plot"><span style="color: #003399; font-weight: bold;">plot</span></a><span style="color: #009900;">(</span>trees<em>out<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><br />nodelabels<span style="color: #009900;">(</span>trees</em>out<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span>$node.label<span style="color: #339933;">,</span> cex = <span style="color: #cc66cc;">0.6</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/title"><span style="color: #003399; font-weight: bold;">title</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;original tree&quot;</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/plot"><span style="color: #003399; font-weight: bold;">plot</span></a><span style="color: #009900;">(</span>trees<em>out<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">2</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><br />nodelabels<span style="color: #009900;">(</span>trees</em>out<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">2</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span>$node.label<span style="color: #339933;">,</span> cex = <span style="color: #cc66cc;">0.6</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/title"><span style="color: #003399; font-weight: bold;">title</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;bladj method&quot;</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/plot"><span style="color: #003399; font-weight: bold;">plot</span></a><span style="color: #009900;">(</span>trees<em>out<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">3</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><br />nodelabels<span style="color: #009900;">(</span>trees</em>out<span style="color: #009900;">[</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">3</span><span style="color: #009900;">]</span><span style="color: #009900;">]</span>$node.label<span style="color: #339933;">,</span> cex = <span style="color: #cc66cc;">0.6</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/title"><span style="color: #003399; font-weight: bold;">title</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;gene hunt method, scalePhylo&quot;</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/grDevices/dev.off"><span style="color: #003399; font-weight: bold;">dev.off</span></a><span style="color: #009900;">(</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><br />It is sort of hard to see the differences in&nbsp;the branch length changes here, but the individual output trees will reveal the differences better.<br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/-tLK1y12TYlI/TaHwayCs3GI/AAAAAAAAEbU/rPsFYqSEDuI/s1600/threeplots.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" src="http://2.bp.blogspot.com/-tLK1y12TYlI/TaHwayCs3GI/AAAAAAAAEbU/rPsFYqSEDuI/s1600/threeplots.jpeg" /></a></div></p> @@ -15213,7 +15482,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Phylometa from R - UDPATE - 2011-04-01T14:18:00-07:00 + 2011-04-01T23:18:00+02:00 http://recology.info//2011/04/phylometa-from-r-udpate <p><a href="http://r-ecology.blogspot.com/2010/12/phylogenetic-meta-analysis-in-r-using.html">A while back I posted some messy code to run Phylometa from R</a>, especially useful for processing the output data from Phylometa which is not easily done. The code is still quite messy, but it should work now. I have run the code with tens of different data sets and phylogenies so it should work. <br /><div><br /></div><div>I fixed errors when parentheses came up against numbers in the output, and other things. You can use the code for up to 4 levels of your grouping variable. In addition, there are some lines of code to plot the effect sizes with confidence intervals, comparing random and fixed effects models and phylogenetic and traditional models.&nbsp;</div><div><br /></div><div>Get the code at my website:</div><div>-<span class="Apple-style-span" style="color: #333333; font-family: Georgia, 'Bitstream Charter', serif; font-size: 14px; line-height: 23px;"><a href="https://gist.github.com/939970" style="border-bottom-width: 0px; border-color: initial; border-left-width: 0px; border-right-width: 0px; border-style: initial; border-top-width: 0px; color: #1c9bdc; font-style: inherit; font-weight: inherit; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; outline-color: initial; outline-style: initial; outline-width: 0px; padding-bottom: 0px; padding-left: 0px; padding-right: 0px; padding-top: 0px; text-decoration: underline; vertical-align: baseline;" target="_blank">phylometa<em>fxn.R</a></span></div><div>-<span class="Apple-style-span" style="color: #333333; font-family: Georgia, 'Bitstream Charter', serif; font-size: 14px; line-height: 23px;">&lt;a href=&quot;https://gist.github.com/939971&quot; style=&quot;border-bottom-width: 0px; border-color: initial; border-left-width: 0px; border-right-width: 0px; border-style: initial; border-top-width: 0px; color: #1c9bdc; font-style: inherit; font-weight: inherit; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; outline-color: initial; outline-style: initial; outline-width: 0px; padding-bottom: 0px; padding-left: 0px; padding-right: 0px; padding-top: 0px; text-decoration: underline; vertical-align: baseline;&quot; target=&quot;</em>blank&quot;&gt;phylometa<em>inR</em>run.R</a></span></div><div>- Use the first file to do the work, calling the second file using source().<br />- This new code works with Marc&#39;s new version of Phylometa, so please update:&nbsp;http://lajeunesse.myweb.usf.edu/publications.html</div><div><br /></div><div>Again, please let me know if it doesn&#39;t work, if it&#39;s worthless, what changes could make it better.<br /><br />Some notes on tree formatting for Phylometa. <br />1.&nbsp;Trees cannot have node labels - remove them (e.g., tree$node.label &lt; NULL).<br />2. Trees cannot have zero length branches. This may seem like a non-problem, but it might be for example if you have resolved polytomies and zero length branches are added to resolve the polytomy.<br />3. I think you cannot have a branch length on the root branch.<br /><br /></div></p> @@ -15222,7 +15491,7 @@ summaryBy<span class="p">(</span>rbi <span class=" Bio-ORACLE - 2011-03-25T06:38:00-07:00 + 2011-03-25T14:38:00+01:00 http://recology.info//2011/03/bio-oracle <p><a href="http://www.oracle.ugent.be/index.html">Bio-ORACLE</a><div><br /></div><div>A new dataset available of geophysical, biotic and climate data. Should be fun to play with in R. </div></p> @@ -15231,7 +15500,7 @@ summaryBy<span class="p">(</span>rbi <span class=" basic ggplot2 network graphs ver2 - 2011-03-23T12:09:00-07:00 + 2011-03-23T20:09:00+01:00 http://recology.info//2011/03/basic-ggplot2-network-graphs-ver2 <hr> @@ -15382,7 +15651,7 @@ gggraph<span class="p">(</span>z<span class="p&quo basic ggplot2 network graphs - 2011-03-17T06:35:00-07:00 + 2011-03-17T14:35:00+01:00 http://recology.info//2011/03/basic-ggplot2-network-graphs <p>I have been looking around on the web and have not found anything yet related to using ggplot2 for making graphs/networks. I put together a few functions to make very simple graphs. The bipartite function especially is not ideal, as of course we only want to allow connections between unlike nodes, not all nodes. These functions do not, obviously, take full advantage of the power of ggplot2, but&nbsp;it’s a start.</p> @@ -15402,7 +15671,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Species abundance distributions and basketball - 2011-03-13T13:00:00-07:00 + 2011-03-13T21:00:00+01:00 http://recology.info//2011/03/species-abundance-distributions-and <p>A post over at the Phased blog (http://www.nasw.org/users/mslong/) highlights a recent paper in PLoS One by Robert Warren et al. Similar results were obtained in a 2007 Ecology Letters paper by Nekola and Brown, who showed that abundance distributions found in ecology are similar to those found for scientific citations, Eastern North American precipitation, among other things. A similar argument was made by Nee et al. in 1991 (in the journal PRSL-B). The author of the blog appears to agree with the outcome of the Warren et al. study.<br /><br />I tend to disagree.<br /><br />In the field of graphs/networks, many networks (social, sexual intercourse among humans, etc.) are found to have similar statistical properties to those of ecological networks (food webs, interactions among mutualists, etc.). However, just because these networks have similar statistical properties does not mean that the statistical properties of ecological networks have no biological meaning.<br /><br />They make the argument that the common SAD fit may be an artifact of large data sets alone. However, I don&#39;t see any explanation of why they think large data sets is a valid explanation of SADs. Surely SAD&#39;s are fit to varying sizes of datasets. The problem with small datasets is lack of statistical power to detect a particular pattern, but surely you can get a fit for a particular SAD to a small dataset.<br /><br />There are ecological mechanistic theories behind different SAD models. They argue that because very similar SADs are found in ecological and non-ecological datasets alike one option is that a universal mechanism structures ecological and non-ecological data (with the mechanism unknown in both). Why can&#39;t the same SAD pattern be generated by different mechanisms? <br /><br />Are Warren et al, Nekola, and Nee right in questioning the utility of SADs? Questioning our theories and ideas only makes the theories better in the end by weeding out shortcomings, etc.<br /><br /><br /><span style="float: left; padding-bottom: 5px; padding-left: 5px; padding-right: 5px; padding-top: 5px;"><a href="http://www.researchblogging.org/"><img alt="ResearchBlogging.org" src="http://www.researchblogging.org/public/citation_icons/rb2_large_white.png" style="border: 0;" /></a></span><br /><span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.jtitle=PLoS+ONE&amp;rft_id=info%3Adoi%2F10.1371%2Fjournal.pone.0017342&amp;rfr_id=info%3Asid%2Fresearchblogging.org&amp;rft.atitle=Universal+Ecological+Patterns+in+College+Basketball+Communities&amp;rft.issn=1932-6203&amp;rft.date=2011&amp;rft.volume=6&amp;rft.issue=3&amp;rft.spage=0&amp;rft.epage=&amp;rft.artnum=http%3A%2F%2Fdx.plos.org%2F10.1371%2Fjournal.pone.0017342&amp;rft.au=Warren%2C+R.&amp;rft.au=Skelly%2C+D.&amp;rft.au=Schmitz%2C+O.&amp;rft.au=Bradford%2C+M.&amp;rfe_dat=bpr3.included=1;bpr3.tags=Ecology+%2F+Conservation">Warren, R., Skelly, D., Schmitz, O., &amp; Bradford, M. (2011). Universal Ecological Patterns in College Basketball Communities <span style="font-style: italic;">PLoS ONE, 6</span> (3) DOI: <a href="http://dx.doi.org/10.1371/journal.pone.0017342" rev="review">10.1371/journal.pone.0017342</a></span></p> @@ -15411,7 +15680,7 @@ gggraph<span class="p">(</span>z<span class="p&quo cloudnumbers.com - 2011-03-11T05:28:00-08:00 + 2011-03-11T14:28:00+01:00 http://recology.info//2011/03/cloudnumbers-com <p>UPDATE: I guess it still is not actually available. Bummer...<br /><br /><br /><br />Has anyone used cloudnumbers.com?<br /><br />http://www.cloudnumbers.com/<br /><br />They provide cloud computing, and have built in applications, including R.<br /><br />How well does it work? Does it increase processing speed? I guess it may at the least free up RAM and processor space on your own machine.</p> @@ -15420,7 +15689,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Five ways to visualize your pairwise comparisons - 2011-03-05T10:49:00-08:00 + 2011-03-05T19:49:00+01:00 http://recology.info//2011/03/for-all-your-pairwise-comparison-needs <p>UPDATE: <i><u>At the bottom are two additional methods, and some additions (underlined) are added to the original 5 methods. Thanks for all the feedback...</u></i><br /><i><u>-Also, another post <a href="http://www.r-statistics.com/2010/04/correlation-scatter-plot-matrix-for-ordered-categorical-data/">here</a> about ordered-categorical data</u></i><br /><i><u>-Also #2, a method combining splom and hexbin packages <a href="http://procomun.wordpress.com/2011/03/18/splomr/">here</a>, for larger datasets</u></i><br /><br /><br />In data analysis it is often nice to look at all pairwise combinations of continuous variables in scatterplots. Up until recently, I&nbsp;have used the function splom in the package lattice, but ggplot2 has superior aesthetics, I think anyway.<br /><br />Here a few ways to accomplish the task:<br /><br /># load packages<br /><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/lattice">lattice</a><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/ggplot2">ggplot2</a><span style="color: #009900;">)</span>&nbsp;<br /></pre><div><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/car">car</a><span style="color: #009900;">)</span></pre></div></div><br /></div><br />1) Using base graphics, function &quot;pairs&quot;<br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><a href="http://inside-r.org/r-doc/graphics/pairs"><span style="color: #003399; font-weight: bold;">pairs</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span> pch = <span style="color: #cc66cc;">21</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh3.googleusercontent.com/-QrjiX4jAPxo/TXJ-Qj0mpfI/AAAAAAAAEac/DQkKhzJgl7c/s1600/pairs.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh3.googleusercontent.com/-QrjiX4jAPxo/TXJ-Qj0mpfI/AAAAAAAAEac/DQkKhzJgl7c/s400/pairs.png" width="400" /></a></div><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />2) Using lattice package, function &quot;splom&quot;<br /><br />-<u>Additional code to improve splom plots <a href="http://www.mail-archive.com/r-help@stat.math.ethz.ch/msg94527.html">here</a> (and see Oscar&#39;s code below in comments)</u><br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><a href="http://inside-r.org/r-doc/lattice/splom"><span style="color: #003399; font-weight: bold;">splom</span></a><span style="color: #009900;">(</span>~<a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh6.googleusercontent.com/-boKmTPP2s60/TXKAc4YcbCI/AAAAAAAAEak/ImCWlnGkpmc/s1600/splom.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh6.googleusercontent.com/-boKmTPP2s60/TXKAc4YcbCI/AAAAAAAAEak/ImCWlnGkpmc/s400/splom.png" width="400" /></a></div><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />3) Using package ggplot2, function &quot;plotmatrix&quot;<br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">plotmatrix<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh4.googleusercontent.com/-3aqCYfmVpNw/TXJ-RG6Zs8I/AAAAAAAAEag/ADZRrR8QB8g/s1600/plotmatrix.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh4.googleusercontent.com/-3aqCYfmVpNw/TXJ-RG6Zs8I/AAAAAAAAEag/ADZRrR8QB8g/s400/plotmatrix.png" width="400" /></a></div><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />4) a function called ggcorplot by Mike Lawrence at Dalhousie University<br /><br />-get ggcorplot function at <a href="http://groups.google.com/group/ggplot2/attach/6bf632a9718dddd6/ggcorplot.R?part=2">this link</a><br />-<u>ggcorplot is also built in to Deducer (<a href="http://www.deducer.org/pmwiki/pmwiki.php?n=Main.DeducerManual">get here</a>); see Ian&#39;s code below in the comments</u><br />-<u>Lastly, an improved version of ggcorplot is built in to the ez package (<a href="http://cran.r-project.org/web/packages/ez/index.html">get here</a>)</u><br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">ggcorplot<span style="color: #009900;">(</span><br /> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = <a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span><br /> var<em>text</em>size = <span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span><br /> cor<em>text</em>limits = <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">10</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh6.googleusercontent.com/-jw2x1p68lp0/TXJ-Qs3JgwI/AAAAAAAAEaY/42UfLSleVHc/s1600/ggcorplot.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh6.googleusercontent.com/-jw2x1p68lp0/TXJ-Qs3JgwI/AAAAAAAAEaY/42UfLSleVHc/s400/ggcorplot.png" width="400" /></a></div><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />5) panel.cor function using pairs, similar to ggcorplot, but using base graphics. Not sure who wrote this function, but <a href="http://addictedtor.free.fr/graphiques/graphcode.php?graph=137">here</a> is where I found it.<br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"></pre></div></div><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">panel.cor &lt;- <a href="http://inside-r.org/r-doc/base/function"><span style="color: #003399; font-weight: bold;">function</span></a><span style="color: #009900;">(</span>x<span style="color: #339933;">,</span> y<span style="color: #339933;">,</span> digits=<span style="color: #cc66cc;">2</span><span style="color: #339933;">,</span> prefix=<span style="color: blue;">&quot;&quot;</span><span style="color: #339933;">,</span> cex.cor<span style="color: #009900;">)</span> <br /><span style="color: #009900;">{</span><br /> usr &lt;- <a href="http://inside-r.org/r-doc/graphics/par"><span style="color: #003399; font-weight: bold;">par</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;usr&quot;</span><span style="color: #009900;">)</span><span style="color: #339933;">;</span> <a href="http://inside-r.org/r-doc/base/on.exit"><span style="color: #003399; font-weight: bold;">on.exit</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/graphics/par"><span style="color: #003399; font-weight: bold;">par</span></a><span style="color: #009900;">(</span>usr<span style="color: #009900;">)</span><span style="color: #009900;">)</span> <br /> <a href="http://inside-r.org/r-doc/graphics/par"><span style="color: #003399; font-weight: bold;">par</span></a><span style="color: #009900;">(</span>usr = <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">1</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span> <br /> r &lt;- <a href="http://inside-r.org/r-doc/base/abs"><span style="color: #003399; font-weight: bold;">abs</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/stats/cor"><span style="color: #003399; font-weight: bold;">cor</span></a><span style="color: #009900;">(</span>x<span style="color: #339933;">,</span> y<span style="color: #009900;">)</span><span style="color: #009900;">)</span> <br /> txt &lt;- <a href="http://inside-r.org/r-doc/base/format"><span style="color: #003399; font-weight: bold;">format</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span>r<span style="color: #339933;">,</span> <span style="color: #cc66cc;">0.123456789</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> digits=digits<span style="color: #009900;">)</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span> <br /> txt &lt;- <a href="http://inside-r.org/r-doc/base/paste"><span style="color: #003399; font-weight: bold;">paste</span></a><span style="color: #009900;">(</span>prefix<span style="color: #339933;">,</span> txt<span style="color: #339933;">,</span> sep=<span style="color: blue;">&quot;&quot;</span><span style="color: #009900;">)</span> <br /> <span style="color: black; font-weight: bold;">if</span><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/missing"><span style="color: #003399; font-weight: bold;">missing</span></a><span style="color: #009900;">(</span>cex.cor<span style="color: #009900;">)</span><span style="color: #009900;">)</span> cex &lt;- <span style="color: #cc66cc;">0.8</span>/strwidth<span style="color: #009900;">(</span>txt<span style="color: #009900;">)</span> <br />&nbsp;<br /> test &lt;- <a href="http://inside-r.org/r-doc/stats/cor.test"><span style="color: #003399; font-weight: bold;">cor.test</span></a><span style="color: #009900;">(</span>x<span style="color: #339933;">,</span>y<span style="color: #009900;">)</span> <br /> <span style="color: #666666; font-style: italic;"># borrowed from printCoefmat</span><br /> Signif &lt;- <a href="http://inside-r.org/r-doc/stats/symnum"><span style="color: #003399; font-weight: bold;">symnum</span></a><span style="color: #009900;">(</span>test$p.value<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/boot/corr"><span style="color: #003399; font-weight: bold;">corr</span></a> = <span style="color: black; font-weight: bold;">FALSE</span><span style="color: #339933;">,</span> na = <span style="color: black; font-weight: bold;">FALSE</span><span style="color: #339933;">,</span> <br /> cutpoints = <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0.001</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0.01</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0.05</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0.1</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">1</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> <a href="http://inside-r.org/r-doc/graphics/symbols"><span style="color: #003399; font-weight: bold;">symbols</span></a> = <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;<em>**&quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;</em><em>&quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;</em>&quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;.&quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot; &quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span> <br />&nbsp;<br /> <a href="http://inside-r.org/r-doc/graphics/text"><span style="color: #003399; font-weight: bold;">text</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">0.5</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0.5</span><span style="color: #339933;">,</span> txt<span style="color: #339933;">,</span> cex = cex * r<span style="color: #009900;">)</span> <br /> <a href="http://inside-r.org/r-doc/graphics/text"><span style="color: #003399; font-weight: bold;">text</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">.8</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">.8</span><span style="color: #339933;">,</span> Signif<span style="color: #339933;">,</span> cex=cex<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/base/col"><span style="color: #003399; font-weight: bold;">col</span></a>=<span style="color: #cc66cc;">2</span><span style="color: #009900;">)</span> <br /><span style="color: #009900;">}</span><br />&nbsp;</pre><pre class="r geshifilter-R" style="font-family: monospace;"><a href="http://inside-r.org/r-doc/graphics/pairs"><span style="color: #003399; font-weight: bold;">pairs</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span> lower.panel=<a href="http://inside-r.org/r-doc/graphics/panel.smooth"><span style="color: #003399; font-weight: bold;">panel.smooth</span></a><span style="color: #339933;">,</span> upper.panel=panel.cor<span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh3.googleusercontent.com/-df9_9d84Qdg/TXJ9s9OZQgI/AAAAAAAAEaU/zHwWtTQZFSw/s1600/panel.cor.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh3.googleusercontent.com/-df9_9d84Qdg/TXJ9s9OZQgI/AAAAAAAAEaU/zHwWtTQZFSw/s400/panel.cor.png" width="400" /></a></div><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br /><br />A comparison of run times...<br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">&gt; <a href="http://inside-r.org/r-doc/base/system.time"><span style="color: #003399; font-weight: bold;">system.time</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/graphics/pairs"><span style="color: #003399; font-weight: bold;">pairs</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /> user <a href="http://inside-r.org/r-doc/base/system"><span style="color: #003399; font-weight: bold;">system</span></a> elapsed <br /> <span style="color: #cc66cc;">0.138</span> <span style="color: #cc66cc;">0.008</span> <span style="color: #cc66cc;">0.156</span> <br />&gt; <a href="http://inside-r.org/r-doc/base/system.time"><span style="color: #003399; font-weight: bold;">system.time</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/lattice/splom"><span style="color: #003399; font-weight: bold;">splom</span></a><span style="color: #009900;">(</span>~<a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /> user <a href="http://inside-r.org/r-doc/base/system"><span style="color: #003399; font-weight: bold;">system</span></a> elapsed <br /> <span style="color: #cc66cc;">0.003</span> <span style="color: #cc66cc;">0.000</span> <span style="color: #cc66cc;">0.003</span> <br />&gt; <a href="http://inside-r.org/r-doc/base/system.time"><span style="color: #003399; font-weight: bold;">system.time</span></a><span style="color: #009900;">(</span>plotmatrix<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /> user <a href="http://inside-r.org/r-doc/base/system"><span style="color: #003399; font-weight: bold;">system</span></a> elapsed <br /> <span style="color: #cc66cc;">0.052</span> <span style="color: #cc66cc;">0.000</span> <span style="color: #cc66cc;">0.052</span> <br />&gt; <a href="http://inside-r.org/r-doc/base/system.time"><span style="color: #003399; font-weight: bold;">system.time</span></a><span style="color: #009900;">(</span>ggcorplot<span style="color: #009900;">(</span><br />+ <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = <a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span><br /> var<em>text</em>size = <span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span><br /> cor<em>text</em>limits = <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">10</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br />&nbsp;<br /> user <a href="http://inside-r.org/r-doc/base/system"><span style="color: #003399; font-weight: bold;">system</span></a> elapsed <br /> <span style="color: #cc66cc;">0.130</span> <span style="color: #cc66cc;">0.001</span> <span style="color: #cc66cc;">0.131</span> <br />&gt; <a href="http://inside-r.org/r-doc/base/system.time"><span style="color: #003399; font-weight: bold;">system.time</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/graphics/pairs"><span style="color: #003399; font-weight: bold;">pairs</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span> lower.panel=<a href="http://inside-r.org/r-doc/graphics/panel.smooth"><span style="color: #003399; font-weight: bold;">panel.smooth</span></a><span style="color: #339933;">,</span> upper.panel=panel.cor<span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /> user <a href="http://inside-r.org/r-doc/base/system"><span style="color: #003399; font-weight: bold;">system</span></a> elapsed <br /> <span style="color: #cc66cc;">0.170</span> <span style="color: #cc66cc;">0.011</span> <span style="color: #cc66cc;">0.200</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br />...shows that splom is the fastest method, with the method using the panel.cor function pulling up the rear.<br /><br /><br /><br />6) given by a reader in the comments (get her/his code <a href="http://handlesman.blogspot.com/2011/03/matrix-plot-with-confidence-intervals.html">here</a>). This one is nice as it gives 95% CI&#39;s for the correlation coefficients, AND histograms of each variable.<br /><br /><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s400/pairs_anehandlesman.png" style="cursor: move;" width="400" /></a><br /><br /><br />7)&nbsp;a reader in the comments suggested the scatterplotMatrix (spm can be used) function in the car package. This one has the advantage of plotting distributions of each variable, and providing fits to each data with confidence intervals.<br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">spm<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/datasets/iris"><span style="color: #003399; font-weight: bold;">iris</span></a><span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span></pre></div></div><br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh4.googleusercontent.com/-gV9rB3jhZUU/TXTY57Z0bwI/AAAAAAAAEas/GuEWgTGpVzk/s1600/car.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="320" src="https://lh4.googleusercontent.com/-gV9rB3jhZUU/TXTY57Z0bwI/AAAAAAAAEas/GuEWgTGpVzk/s400/car.png" width="400" /></a></div><br /><div class="separator" style="clear: both; text-align: center;"><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a><a href="https://lh5.googleusercontent.com/-zC6MFpoo5w8/TXO0aJt1UtI/AAAAAAAAEao/lMOOaniqYPU/s1600/pairs_anehandlesman.png" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a></div></p> @@ -15429,7 +15698,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Check out Phyloseminar.org - 2011-03-04T06:22:00-08:00 + 2011-03-04T15:22:00+01:00 http://recology.info//2011/03/check-out-phyloseminar-org <div>They have online seminars that you can join in on live, and watch later as recorded videos. Check it out at: <a href="http://phyloseminar.org/index.html">phyloseminar.org home</a></div> @@ -15437,9 +15706,9 @@ gggraph<span class="p">(</span>z<span class="p&quo RStudio - - 2011-02-28T18:21:00-08:00 - http://recology.info//2011/02/rstudio + + 2011-03-01T03:21:00+01:00 + http://recology.info//2011/03/rstudio <p><b><u>New thoughts</u></b>: After actually using it more, it is quite nice, but I have a couple of major issues.<br />1. The text editor is quite slow to scroll through.<br />2. ggplot2 graphics look bad, worse than if just running R alone.<br /><br /><a href="http://www.rstudio.org/">RStudio</a><br /><div><br /></div><div>Everyone seems to be excited about this...</div><div><br /></div><div>Is it any good? Seems great for folks just learning R, but perhaps less ideal for advanced R users?</div><div><br /></div></p> @@ -15447,7 +15716,7 @@ gggraph<span class="p">(</span>z<span class="p&quo R overtakes SAS in popularity - 2011-02-25T06:58:00-08:00 + 2011-02-25T15:58:00+01:00 http://recology.info//2011/02/r-overtakes-sas-in-popularity <p><a href="http://www.tiobe.com/index.php/content/paperinfo/tpci/index.html">TIOBE Software: Tiobe Index</a></p> @@ -15456,7 +15725,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Phenotypic selection analysis in R - 2011-02-24T07:43:00-08:00 + 2011-02-24T16:43:00+01:00 http://recology.info//2011/02/phenotypic-selection-analysis-in-r <p>I have up to recently always done my phenotypic selection analyses in SAS. I finally got some code I think works to do everything SAS would do. Feedback much appreciated!<br /><br /><br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;">########################Selection analyses#############################</span><br /><a href="http://inside-r.org/r-doc/utils/install.packages"><span style="color: #003399; font-weight: bold;">install.packages</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;car&quot;</span><span style="color: #339933;">,</span><span style="color: blue;">&quot;reshape&quot;</span><span style="color: #339933;">,</span><span style="color: blue;">&quot;ggplot2&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/car">car</a><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/stats/reshape"><span style="color: #003399; font-weight: bold;">reshape</span></a><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/ggplot2">ggplot2</a><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Create data set</span><br />dat &lt;- <a href="http://inside-r.org/r-doc/base/data.frame"><span style="color: #003399; font-weight: bold;">data.frame</span></a><span style="color: #009900;">(</span>plant = <a href="http://inside-r.org/r-doc/base/seq"><span style="color: #003399; font-weight: bold;">seq</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">100</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> trait1 = <a href="http://inside-r.org/r-doc/base/rep"><span style="color: #003399; font-weight: bold;">rep</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">0.1</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.15</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.2</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.21</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.25</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.3</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.5</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.6</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.8</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.9</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">4</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">10</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">11</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">12</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">13</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">14</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">15</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">16</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> each = <span style="color: #cc66cc;">5</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> trait2 = <a href="http://inside-r.org/r-doc/stats/runif"><span style="color: #003399; font-weight: bold;">runif</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">100</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> fitness = <a href="http://inside-r.org/r-doc/base/rep"><span style="color: #003399; font-weight: bold;">rep</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">10</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">20</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">50</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> each = <span style="color: #cc66cc;">20</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Make relative fitness column</span><br />dat_ &lt;- <a href="http://inside-r.org/r-doc/base/cbind"><span style="color: #003399; font-weight: bold;">cbind</span></a><span style="color: #009900;">(</span>dat<span style="color: #339933;">,</span> dat$fitness/mean<span style="color: #009900;">(</span>dat$fitness<span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/names"><span style="color: #003399; font-weight: bold;">names</span></a><span style="color: #009900;">(</span>dat<em><span style="color: #009900;">)</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">5</span><span style="color: #009900;">]</span> &lt;- <span style="color: blue;">&quot;relfitness&quot;</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Standardize traits</span><br />dat</em> &lt;- <a href="http://inside-r.org/r-doc/base/cbind"><span style="color: #003399; font-weight: bold;">cbind</span></a><span style="color: #009900;">(</span>dat<em><span style="color: #009900;">[</span><span style="color: #339933;">,</span>-<a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">2</span>:<span style="color: #cc66cc;">3</span><span style="color: #009900;">)</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span> rescaler<span style="color: #009900;">(</span>dat</em><span style="color: #009900;">[</span><span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">2</span>:<span style="color: #cc66cc;">3</span><span style="color: #009900;">)</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span><span style="color: blue;">&quot;sd&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;">####Selection differentials and correlations among traits, cor.prob uses function in functions.R file</span><br /><span style="color: #666666; font-style: italic;">############################################################################</span><br /><span style="color: #666666; font-style: italic;">####### Function for calculating correlation matrix, corrs below diagonal,</span><br /><span style="color: #666666; font-style: italic;">####### and P-values above diagonal</span><br /><span style="color: #666666; font-style: italic;">############################################################################</span><br />cor.prob &lt;- <a href="http://inside-r.org/r-doc/base/function"><span style="color: #003399; font-weight: bold;">function</span></a><span style="color: #009900;">(</span>X<span style="color: #339933;">,</span> dfr = <a href="http://inside-r.org/r-doc/base/nrow"><span style="color: #003399; font-weight: bold;">nrow</span></a><span style="color: #009900;">(</span>X<span style="color: #009900;">)</span> - <span style="color: #cc66cc;">2</span><span style="color: #009900;">)</span> <span style="color: #009900;">{</span><br /> R &lt;- <a href="http://inside-r.org/r-doc/stats/cor"><span style="color: #003399; font-weight: bold;">cor</span></a><span style="color: #009900;">(</span>X<span style="color: #009900;">)</span><br /> above &lt;- <a href="http://inside-r.org/r-doc/base/row"><span style="color: #003399; font-weight: bold;">row</span></a><span style="color: #009900;">(</span>R<span style="color: #009900;">)</span> &lt; <a href="http://inside-r.org/r-doc/base/col"><span style="color: #003399; font-weight: bold;">col</span></a><span style="color: #009900;">(</span>R<span style="color: #009900;">)</span><br /> r2 &lt;- R<span style="color: #009900;">[</span>above<span style="color: #009900;">]</span>^<span style="color: #cc66cc;">2</span><br /> Fstat &lt;- r2 * dfr / <span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span> - r2<span style="color: #009900;">)</span><br /> R<span style="color: #009900;">[</span>above<span style="color: #009900;">]</span> &lt;- <span style="color: #cc66cc;">1</span> - <a href="http://inside-r.org/r-doc/stats/pf"><span style="color: #003399; font-weight: bold;">pf</span></a><span style="color: #009900;">(</span>Fstat<span style="color: #339933;">,</span> <span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> dfr<span style="color: #009900;">)</span><br /> R<br /><span style="color: #009900;">}</span> <br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Get selection differentials and correlations among traits in one data frame</span><br />dat<em>seldiffs &lt;- <a href="http://inside-r.org/r-doc/stats/cov"><span style="color: #003399; font-weight: bold;">cov</span></a><span style="color: #009900;">(</span>dat</em><span style="color: #009900;">[</span><span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">3</span>:<span style="color: #cc66cc;">5</span><span style="color: #009900;">)</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># calculates sel&#39;n differentials using cov</span><br />dat<em>selcorrs &lt;- cor.prob<span style="color: #009900;">(</span>dat</em><span style="color: #009900;">[</span><span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">3</span>:<span style="color: #cc66cc;">5</span><span style="color: #009900;">)</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># use P-values above diagonal for significance of sel&#39;n differentials in dat<em>seldiffs</span><br />dat</em>seldiffs<em>selcorrs &lt;- <a href="http://inside-r.org/r-doc/base/data.frame"><span style="color: #003399; font-weight: bold;">data.frame</span></a><span style="color: #009900;">(</span>dat</em>seldiffs<span style="color: #339933;">,</span> dat<em>selcorrs<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># combine the two</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;">##########################################################################</span><br /><span style="color: #666666; font-style: italic;">####Selection gradients</span><br />dat</em>selngrad &lt;- <a href="http://inside-r.org/r-doc/stats/lm"><span style="color: #003399; font-weight: bold;">lm</span></a><span style="color: #009900;">(</span>relfitness ~ trait1 * trait2<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = dat<em><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/summary"><span style="color: #003399; font-weight: bold;">summary</span></a><span style="color: #009900;">(</span>dat</em>selngrad<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># where &quot;Estimate&quot; is our sel&#39;n gradient</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;">####Check assumptions</span><br /><a href="http://inside-r.org/r-doc/stats/shapiro.test"><span style="color: #003399; font-weight: bold;">shapiro.test</span></a><span style="color: #009900;">(</span>dat<em>selngrad$residuals<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># normality, bummer, non-normal</span><br /><a href="http://inside-r.org/r-doc/graphics/hist"><span style="color: #003399; font-weight: bold;">hist</span></a><span style="color: #009900;">(</span>dat</em>selngrad$residuals<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># plot residuals</span><br /><a href="http://inside-r.org/packages/cran/VIF">vif</a><span style="color: #009900;">(</span>dat<em>selngrad<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># check variance inflation factors (need package car), everything looks fine</span><br /><a href="http://inside-r.org/r-doc/graphics/plot"><span style="color: #003399; font-weight: bold;">plot</span></a><span style="color: #009900;">(</span>dat</em>selngrad<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># cycle through diagnostic plots</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;">############################################################################</span><br /><span style="color: #666666; font-style: italic;"># Plot data</span><br /><a href="http://inside-r.org/packages/cran/ggplot">ggplot</a><span style="color: #009900;">(</span>dat<em><span style="color: #339933;">,</span> aes<span style="color: #009900;">(</span>trait1<span style="color: #339933;">,</span> relfitness<span style="color: #009900;">)</span><span style="color: #009900;">)</span> +<br /> geom</em>point<span style="color: #009900;">(</span><span style="color: #009900;">)</span> +<br /> geom_smooth<span style="color: #009900;">(</span>method = <span style="color: blue;">&quot;lm&quot;</span><span style="color: #009900;">)</span> +<br /> labs<span style="color: #009900;">(</span>x=<span style="color: blue;">&quot;Trait 1&quot;</span><span style="color: #339933;">,</span>y=<span style="color: blue;">&quot;Relative fitness&quot;</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;myplot.jpeg&quot;</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><br />Plot of relative fitness vs. trait 1 standardized<br /><br /><div class="separator" style="clear: both; text-align: center;"><a href="http://2.bp.blogspot.com/-OVQl92LOmZY/TWZ8RW9lHlI/AAAAAAAAEaQ/MGB39Lyghig/s1600/myplot.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="400" src="http://2.bp.blogspot.com/-OVQl92LOmZY/TWZ8RW9lHlI/AAAAAAAAEaQ/MGB39Lyghig/s400/myplot.jpeg" width="400" /></a></div></p> @@ -15465,7 +15734,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Phylogenetic analysis with the phangorn package: an example - 2011-02-21T10:31:00-08:00 + 2011-02-21T19:31:00+01:00 http://recology.info//2011/02/phylogenetic-analysis-with-the-phangorn-package-an-example <p>The phangorn package is a relatively new package in R for the analysis and comparison of phylogenies.&nbsp;See <a href="http://bioinformatics.oxfordjournals.org/content/27/4/592.short">here</a> for the Bioinformatics paper and <a href="http://cran.r-project.org/web/packages/phangorn/index.html">here</a> for the package.&nbsp;Here is an example of using phangorn from getting sequences to making phylogenies and visualizing them:<br /><br /><u>Getting sequences from Genbank</u><br /><br /><br /><br /><u>Multiple alignment</u><br /><br /><br /><br /><u>Maximum likelihood tree reconstruction</u><br /><br /><br /><br /><u>Visualizing trees</u><br /><br /><br /><br /><u>Visualizing trees and traits</u><br />Make fake traits:<br /><br />Visualize them on trees:<br /><u><br /></u><br /><u><br /></u><br /><u><br /></u></p> @@ -15474,7 +15743,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Farmer's markets data - 2011-02-16T20:41:00-08:00 + 2011-02-17T05:41:00+01:00 http://recology.info//2011/02/farmer-s-markets-data <p>I combined USDA data on farmer&#39;s markets in the US with population census data to get an idea of the disparity in farmers markets by state, and then also expressed per capita.<br /><br />Download USDA data <a href="http://www.ams.usda.gov/AMSv1.0/getfile?dDocName=STELPRDC5087258&amp;acct=frmrdirmkt">here</a>. The formatted file I used below is <a href="http://schamber.files.wordpress.com/2011/02/farmmarkets.xls">here</a> (in excel format, although I read into R as csv file). The census data is read from url as below.<br /><br />California has a ton of absolute number of farmer&#39;s markets, but Vermont takes the cake by far with number of markets per capita. Iowa comes in a distant second behind Vermont in markets per capita.<br /><br /><br /><br />The code:<br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;">######## Farmer&#39;s Markets #############</span><br /><a href="http://inside-r.org/r-doc/base/setwd"><span style="color: #003399; font-weight: bold;">setwd</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;/Mac/R<em>stuff/Blog</em>etc/USDAFarmersMarkets&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># Set to your working directory, this is where you want to call files from and write files to</span><br /><a href="http://inside-r.org/r-doc/utils/install.packages"><span style="color: #003399; font-weight: bold;">install.packages</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;ggplot2&quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;RCurl&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># install all packags required below</span><br /><a href="http://inside-r.org/r-doc/base/require"><span style="color: #003399; font-weight: bold;">require</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/ggplot2">ggplot2</a><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># plyr is libraried along with ggplot2, as ggplot2 uses plyr (as well as package reshape) functions</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># read market data</span><br />markets &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;farmmarkets.csv&quot;</span><span style="color: #009900;">)</span><br />markets$state &lt;- <a href="http://inside-r.org/r-doc/base/as.factor"><span style="color: #003399; font-weight: bold;">as.factor</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;Wyoming &quot;</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;Wyoming&quot;</span><span style="color: #339933;">,</span> markets$LocAddState<span style="color: #009900;">)</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># there was a typo for Wyoming</span><br />markets &lt;- <a href="http://inside-r.org/r-doc/stats/na.omit"><span style="color: #003399; font-weight: bold;">na.omit</span></a><span style="color: #009900;">(</span>markets<span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/utils/str"><span style="color: #003399; font-weight: bold;">str</span></a><span style="color: #009900;">(</span>markets<span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># read population census data</span><br />popcen &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;http://www.census.gov/popest/national/files/NST<em>EST2009</em>ALLDATA.csv&quot;</span><span style="color: #009900;">)</span><br />popcen &lt;- popcen<span style="color: #009900;">[</span><span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">4</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">6</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">17</span><span style="color: #009900;">)</span><span style="color: #009900;">]</span><br /><a href="http://inside-r.org/r-doc/utils/str"><span style="color: #003399; font-weight: bold;">str</span></a><span style="color: #009900;">(</span>popcen<span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># summarize</span><br />markets_ &lt;- ddply<span style="color: #009900;">(</span>markets<span style="color: #339933;">,</span> .<span style="color: #009900;">(</span>state<span style="color: #009900;">)</span><span style="color: #339933;">,</span> summarise<span style="color: #339933;">,</span><br /> markets<em>n = <a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>LocAddState<span style="color: #009900;">)</span> <br /><span style="color: #009900;">)</span><br />&nbsp;<br />markets</em>pop_ &lt;- <a href="http://inside-r.org/r-doc/base/merge"><span style="color: #003399; font-weight: bold;">merge</span></a><span style="color: #009900;">(</span>markets<em><span style="color: #339933;">,</span> popcen<span style="color: #009900;">[</span><span style="color: #339933;">,</span>-<span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #339933;">,</span> by.x = <span style="color: blue;">&quot;state&quot;</span><span style="color: #339933;">,</span> by.y = <span style="color: blue;">&quot;NAME&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># merge two data sets</span><br />markets</em>pop<em>$marketspercap &lt;- markets</em>pop<em>$markets</em>n/markets<em>pop</em>$POPESTIMATE2009 <span style="color: #666666; font-style: italic;"># create column of markets per capita</span><br />markets<em>pop</em>$markets<em>n</em>st &lt;- markets<em>pop</em>$markets<em>n/max<span style="color: #009900;">(</span>markets</em>pop<em>$markets</em>n<span style="color: #009900;">)</span><br />markets<em>pop</em>$marketspercap<em>st &lt;- markets</em>pop<em>$marketspercap/max<span style="color: #009900;">(</span>markets</em>pop<em>$marketspercap<span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># plot</span><br /><a href="http://inside-r.org/packages/cran/ggplot">ggplot</a><span style="color: #009900;">(</span>melt<span style="color: #009900;">(</span>markets</em>pop<em><span style="color: #009900;">[</span><span style="color: #339933;">,</span>-<a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">2</span>:<span style="color: #cc66cc;">5</span><span style="color: #009900;">)</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> aes<span style="color: #009900;">(</span>x = state<span style="color: #339933;">,</span> y = value<span style="color: #339933;">,</span> fill = variable<span style="color: #009900;">)</span><span style="color: #009900;">)</span> +<br /> geom</em>bar<span style="color: #009900;">(</span>position = <span style="color: blue;">&quot;dodge&quot;</span><span style="color: #009900;">)</span> +<br /> coord<em>flip<span style="color: #009900;">(</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;fmarkets</em>barplot.jpeg&quot;</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br />Note: the x-axis here is standardized value of number of markets (markets<em>n</em>st) and number of markets per capita (marketspercap<em>st).<br />&lt;a href=&quot;http://4.bp.blogspot.com/-ceVMLE6yfbk/TVyE31U6LTI/AAAAAAAAEaM/PM2LCHnLPMM/s1600/fmarkets</em>barplot.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;<img border="0" height="452" src="http://4.bp.blogspot.com/-ceVMLE6yfbk/TVyE31U6LTI/AAAAAAAAEaM/PM2LCHnLPMM/s640/fmarkets_barplot.jpeg" style="cursor: move;" width="640" /></a><br /><br /><br /><div style="overflow-x: auto; overflow-y: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span style="color: #666666; font-style: italic;"># maps</span><br />try<em>require<span style="color: #009900;">(</span><span style="color: blue;">&quot;maps&quot;</span><span style="color: #009900;">)</span><br />states &lt;- map</em>data<span style="color: #009900;">(</span><span style="color: blue;">&quot;state&quot;</span><span style="color: #009900;">)</span><br />markets<em>pop</em>$statelow &lt;- <a href="http://inside-r.org/r-doc/base/tolower"><span style="color: #003399; font-weight: bold;">tolower</span></a><span style="color: #009900;">(</span>markets<em>pop</em>$state<span style="color: #009900;">)</span><br />survey<em>sum</em>map &lt;- <a href="http://inside-r.org/r-doc/base/merge"><span style="color: #003399; font-weight: bold;">merge</span></a><span style="color: #009900;">(</span>states<span style="color: #339933;">,</span> markets<em>pop</em><span style="color: #339933;">,</span> by.x = <span style="color: blue;">&quot;region&quot;</span><span style="color: #339933;">,</span> by.y = <span style="color: blue;">&quot;statelow&quot;</span><span style="color: #009900;">)</span><br />survey<em>sum</em>map &lt;- survey<em>sum</em>map<span style="color: #009900;">[</span><a href="http://inside-r.org/r-doc/base/order"><span style="color: #003399; font-weight: bold;">order</span></a><span style="color: #009900;">(</span>survey<em>sum</em>map$order<span style="color: #009900;">)</span><span style="color: #339933;">,</span> <span style="color: #009900;">]</span><br /><a href="http://inside-r.org/r-doc/utils/str"><span style="color: #003399; font-weight: bold;">str</span></a><span style="color: #009900;">(</span>survey<em>sum</em>map<span style="color: #009900;">)</span><br />&nbsp;<br />qplot<span style="color: #009900;">(</span>long<span style="color: #339933;">,</span> lat<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = survey<em>sum</em>map<span style="color: #339933;">,</span> group = group<span style="color: #339933;">,</span> fill = markets<em>n<span style="color: #339933;">,</span> geom = <span style="color: blue;">&quot;polygon&quot;</span><span style="color: #339933;">,</span> main = <span style="color: blue;">&quot;Total farmer&#39;s markets&quot;</span><span style="color: #009900;">)</span> + <br /> scale</em>fill<em>gradient<span style="color: #009900;">(</span>low=<span style="color: blue;">&quot;green&quot;</span><span style="color: #339933;">,</span> high=<span style="color: blue;">&quot;black&quot;</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;fmarkets</em>map<em>green.jpeg&quot;</span><span style="color: #009900;">)</span> </pre></div></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a></div><div><br /></div><br /><br /><br />&lt;a href=&quot;http://2.bp.blogspot.com/-I-Hqg4GtJs0/TVyE3I7BmYI/AAAAAAAAEaI/xNqBq4BqemI/s1600/fmarkets</em>map<em>green.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;&lt;img border=&quot;0&quot; height=&quot;452&quot; src=&quot;http://2.bp.blogspot.com/-I-Hqg4GtJs0/TVyE3I7BmYI/AAAAAAAAEaI/xNqBq4BqemI/s640/fmarkets</em>map<em>green.jpeg&quot; style=&quot;cursor: move;&quot; width=&quot;640&quot; /&gt;</a><br /><br /><br /><div style="overflow-x: auto; overflow-y: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"></pre><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"></pre><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">qplot<span style="color: #009900;">(</span>long<span style="color: #339933;">,</span> lat<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = survey</em>sum<em>map<span style="color: #339933;">,</span> group = group<span style="color: #339933;">,</span> fill = marketspercap<span style="color: #339933;">,</span> geom = <span style="color: blue;">&quot;polygon&quot;</span><span style="color: #339933;">,</span> main = <span style="color: blue;">&quot;Farmer&#39;s markets per person&quot;</span><span style="color: #009900;">)</span> +<br /> scale</em>fill<em>gradient<span style="color: #009900;">(</span>low=<span style="color: blue;">&quot;green&quot;</span><span style="color: #339933;">,</span> high=<span style="color: blue;">&quot;black&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;fmarkerspercap</em>map<em>green.jpeg&quot;</span><span style="color: #009900;">)</span> </pre></div></div><div style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a></div><div><br /></div><br /><div class="separator" style="clear: both; text-align: center;">&lt;a href=&quot;http://4.bp.blogspot.com/-g8zapywmu7M/TVyE2qNzFnI/AAAAAAAAEaE/yrjk5txjFgo/s1600/fmarkerspercap</em>map<em>green.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; float: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;&lt;img border=&quot;0&quot; height=&quot;453&quot; src=&quot;http://4.bp.blogspot.com/-g8zapywmu7M/TVyE2qNzFnI/AAAAAAAAEaE/yrjk5txjFgo/s640/fmarkerspercap</em>map<em>green.jpeg&quot; width=&quot;640&quot; /&gt;</a></div><br /><div class="separator" style="clear: both; text-align: center;">&lt;a href=&quot;http://2.bp.blogspot.com/-I-Hqg4GtJs0/TVyE3I7BmYI/AAAAAAAAEaI/xNqBq4BqemI/s1600/fmarkets</em>map<em>green.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; float: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;<br /></a></div><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"></pre></div></div><div class="separator" style="clear: both; text-align: center;">&lt;a href=&quot;http://4.bp.blogspot.com/-ceVMLE6yfbk/TVyE31U6LTI/AAAAAAAAEaM/PM2LCHnLPMM/s1600/fmarkets</em>barplot.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; float: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;<br /></a></div></p> @@ -15483,7 +15752,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Troubling news for the teaching of evolution - 2011-02-09T06:20:00-08:00 + 2011-02-09T15:20:00+01:00 http://recology.info//2011/02/troubling-news-for-teaching-of <p>[UPDATE: i remade the maps in green, hope that helps...]<br /><br />A recent survey reported in <a href="http://www.sciencemag.org.silk.library.umass.edu/content/331/6016/404.full">Science</a>&nbsp;(&quot;Defeating Creationism in the Courtroom, but not in the Classroom&quot;) found that biology teachers in high school do not often accept the basis of their discipline, as do teachers in other disciplines, and thus may not teach evolution appropriately.&nbsp;Read more here:&nbsp;<a href="http://www.nytimes.com/2011/02/08/science/08creationism.html?emc=eta1">New York Times</a>.<br /><br />I took a little time to play with the data provided online along with the Science article. The data is available on the Science website along with the article, and the dataset I read into R is unchanged from the original. The states abbreviations file is <a href="http://schamber.files.wordpress.com/2011/02/states_abbreviations.xls">here</a>&nbsp;(as a .xls). Here goes:<br /><br />I only played with two survey questions: q1b (no. of hours ecology is taught per year), and q1d (no. of hours evolution is taught per year). I looked at ecology and evolution as this blog is about ecology and evolution. It seems that some states that teach a lot of ecology teach&nbsp;a lot of evolution, but I found no correlation between the two without extreme outliers. I&nbsp;couldn’t help but notice my home state, TX, is near the bottom of the list on both counts - go TX! The teaching of evolution on the map produced below is less predictable than I would have though just based on my assumptions about political will in each state.<br /><br /><div style="overflow: auto;"><div class="geshifilter"><div class="separator" style="clear: both; text-align: center;"><br /></div><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;"># Analyses of Conditionality Data set of all variables, except for latitude, etc.</span><br /><a href="http://inside-r.org/r-doc/base/setwd"><span style="color: #003399; font-weight: bold;">setwd</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;/Mac/R<em>stuff/Blog</em>etc/EvolutionTeaching/&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># Set working directory</span><br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/packages/cran/ggplot2">ggplot2</a><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># read in data, and prepare new columns</span><br /><a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a> &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;berkmandata.csv&quot;</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/utils/str"><span style="color: #003399; font-weight: bold;">str</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a><span style="color: #009900;">) # (I do realize that survey is a data object in the MASS package)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Assign actual hours to survey answers </span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a>$q1b<span style="color: #009900;">)</span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">2</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">1.5</span><span style="color: #339933;">,</span> ecol<span style="color: #009900;">)</span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">4</span><span style="color: #339933;">,</span> ecol<span style="color: #009900;">)</span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">4</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">8</span><span style="color: #339933;">,</span> ecol<span style="color: #009900;">)</span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">13</span><span style="color: #339933;">,</span> ecol<span style="color: #009900;">)</span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">6</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">18</span><span style="color: #339933;">,</span> ecol<span style="color: #009900;">)</span><br />ecol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">7</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">20</span><span style="color: #339933;">,</span> ecol<span style="color: #009900;">)</span><br />&nbsp;<br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">0</span><span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a>$q1d<span style="color: #009900;">)</span><br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">2</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">1.5</span><span style="color: #339933;">,</span> evol<span style="color: #009900;">)</span><br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">4</span><span style="color: #339933;">,</span> evol<span style="color: #009900;">)</span><br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">4</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">8</span><span style="color: #339933;">,</span> evol<span style="color: #009900;">)</span><br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">5</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">13</span><span style="color: #339933;">,</span> evol<span style="color: #009900;">)</span><br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">6</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">18</span><span style="color: #339933;">,</span> evol<span style="color: #009900;">)</span><br />evol &lt;- <a href="http://inside-r.org/r-doc/base/gsub"><span style="color: #003399; font-weight: bold;">gsub</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">7</span><span style="color: #339933;">,</span> <span style="color: #cc66cc;">20</span><span style="color: #339933;">,</span> evol<span style="color: #009900;">)</span><br />&nbsp;<br /><a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a>$ecol &lt;- <a href="http://inside-r.org/r-doc/base/as.numeric"><span style="color: #003399; font-weight: bold;">as.numeric</span></a><span style="color: #009900;">(</span>ecol<span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a>$evol &lt;- <a href="http://inside-r.org/r-doc/base/as.numeric"><span style="color: #003399; font-weight: bold;">as.numeric</span></a><span style="color: #009900;">(</span>evol<span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># ddply it</span><br />survey<em>sum &lt;- ddply<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/MASS/survey"><span style="color: #003399; font-weight: bold;">survey</span></a><span style="color: #339933;">,</span> .<span style="color: #009900;">(</span>st</em>posta<span style="color: #009900;">)</span><span style="color: #339933;">,</span> summarise<span style="color: #339933;">,</span><br /> mean<em>ecol</em>hrs = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>ecol<span style="color: #339933;">,</span> na.rm=T<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> mean<em>evol</em>hrs = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>evol<span style="color: #339933;">,</span> na.rm=T<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> se<em>ecol</em>hrs = <a href="http://inside-r.org/r-doc/stats/sd"><span style="color: #003399; font-weight: bold;">sd</span></a><span style="color: #009900;">(</span>ecol<span style="color: #339933;">,</span> na.rm=T<span style="color: #009900;">)</span>/sqrt<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>ecol<span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> se<em>evol</em>hrs = <a href="http://inside-r.org/r-doc/stats/sd"><span style="color: #003399; font-weight: bold;">sd</span></a><span style="color: #009900;">(</span>evol<span style="color: #339933;">,</span> na.rm=T<span style="color: #009900;">)</span>/sqrt<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>evol<span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> num<em>teachers = <a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>st</em>posta<span style="color: #009900;">)</span><br /><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># plotting</span><br />limits<em>ecol &lt;- aes<span style="color: #009900;">(</span>ymax = mean</em>ecol<em>hrs + se</em>ecol<em>hrs<span style="color: #339933;">,</span> ymin = mean</em>ecol<em>hrs - se</em>ecol<em>hrs<span style="color: #009900;">)</span><br />limits</em>evol &lt;- aes<span style="color: #009900;">(</span>ymax = mean<em>evol</em>hrs + se<em>evol</em>hrs<span style="color: #339933;">,</span> ymin = mean<em>evol</em>hrs - se<em>evol</em>hrs<span style="color: #009900;">)</span><br />&nbsp;<br /><a href="http://inside-r.org/packages/cran/ggplot">ggplot</a><span style="color: #009900;">(</span>survey<em>sum<span style="color: #339933;">,</span> aes<span style="color: #009900;">(</span>x = <a href="http://inside-r.org/r-doc/stats/reorder"><span style="color: #003399; font-weight: bold;">reorder</span></a><span style="color: #009900;">(</span>st</em>posta<span style="color: #339933;">,</span> mean<em>ecol</em>hrs<span style="color: #009900;">)</span><span style="color: #339933;">,</span> y = mean<em>ecol</em>hrs<span style="color: #009900;">)</span><span style="color: #009900;">)</span> +<br /> geom<em>point<span style="color: #009900;">(</span><span style="color: #009900;">)</span> +<br /> geom</em>errorbar<span style="color: #009900;">(</span>limits<em>ecol<span style="color: #009900;">)</span> +<br /> geom</em>text<span style="color: #009900;">(</span>aes<span style="color: #009900;">(</span>label = num<em>teachers<span style="color: #009900;">)</span><span style="color: #339933;">,</span> vjust = <span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> hjust = -<span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span> size = <span style="color: #cc66cc;">3</span><span style="color: #009900;">)</span> +<br /> coord</em>flip<span style="color: #009900;">(</span><span style="color: #009900;">)</span> +<br /> labs<span style="color: #009900;">(</span>x = <span style="color: blue;">&quot;State&quot;</span><span style="color: #339933;">,</span> y = <span style="color: blue;">&quot;Mean hours of ecology taught <span style="color: #000099; font-weight: bold;">\n</span> per year (+/- 1 se)&quot;</span><span style="color: #009900;">)</span><br /></pre><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #009900;">####SMALL NUMBERS BY BARS ARE NUMBER OF TEACHERS THAT RESPONDED TO THE SURVEY</span></pre><pre class="r geshifilter-R" style="font-family: monospace;"></pre><pre class="r geshifilter-R" style="font-family: monospace;"><a href="http://1.bp.blogspot.com/_fANWq796z-w/TVKfu6zmnJI/AAAAAAAAEZw/b49TxhUjMmk/s1600/survey_ecol.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="640" src="http://1.bp.blogspot.com/_fANWq796z-w/TVKfu6zmnJI/AAAAAAAAEZw/b49TxhUjMmk/s640/survey_ecol.jpeg" width="392" /></a><span class="Apple-style-span" style="font-family: Times; white-space: normal;"></span></pre><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_fANWq796z-w/TVKfu6zmnJI/AAAAAAAAEZw/b49TxhUjMmk/s1600/survey_ecol.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><br /></a></div><div class="separator" style="clear: both; text-align: center;"><br /></div><pre class="r geshifilter-R" style="font-family: monospace;">&nbsp;<br /><a href="http://inside-r.org/packages/cran/ggplot">ggplot</a><span style="color: #009900;">(</span>survey<em>sum<span style="color: #339933;">,</span> aes<span style="color: #009900;">(</span>x = <a href="http://inside-r.org/r-doc/stats/reorder"><span style="color: #003399; font-weight: bold;">reorder</span></a><span style="color: #009900;">(</span>st</em>posta<span style="color: #339933;">,</span> mean<em>evol</em>hrs<span style="color: #009900;">)</span><span style="color: #339933;">,</span> y = mean<em>evol</em>hrs<span style="color: #009900;">)</span><span style="color: #009900;">)</span> +<br /> geom<em>point<span style="color: #009900;">(</span><span style="color: #009900;">)</span> +<br /> geom</em>errorbar<span style="color: #009900;">(</span>limits<em>evol<span style="color: #009900;">)</span> +<br /> geom</em>text<span style="color: #009900;">(</span>aes<span style="color: #009900;">(</span>label = num<em>teachers<span style="color: #009900;">)</span><span style="color: #339933;">,</span> vjust = <span style="color: #cc66cc;">1</span><span style="color: #339933;">,</span> hjust = -<span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span> size = <span style="color: #cc66cc;">3</span><span style="color: #009900;">)</span> +<br /> coord</em>flip<span style="color: #009900;">(</span><span style="color: #009900;">)</span> +<br /> labs<span style="color: #009900;">(</span>x = <span style="color: blue;">&quot;State&quot;</span><span style="color: #339933;">,</span> y = <span style="color: blue;">&quot;Mean hours of evolution taught <span style="color: #000099; font-weight: bold;">\n</span> per year (+/- 1 se)&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<span class="Apple-style-span" style="color: #009900;">####SMALL NUMBERS BY BARS ARE NUMBER OF TEACHERS THAT RESPONDED TO THE SURVEY</span><br /><br /><span class="Apple-style-span" style="font-family: Times; white-space: normal;"><a href="http://4.bp.blogspot.com/_fANWq796z-w/TVKfuQSN7sI/AAAAAAAAEZs/o1EIVgS7lkA/s1600/survey_evol.jpeg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"><img border="0" height="640" src="http://4.bp.blogspot.com/_fANWq796z-w/TVKfuQSN7sI/AAAAAAAAEZs/o1EIVgS7lkA/s640/survey_evol.jpeg" width="392" /></a></span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># map</span><br />try<em>require<span style="color: #009900;">(</span><span style="color: blue;">&quot;maps&quot;</span><span style="color: #009900;">)</span><br />states &lt;- map</em>data<span style="color: #009900;">(</span><span style="color: blue;">&quot;state&quot;</span><span style="color: #009900;">)</span><br />statenames &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;/Mac/R<em>stuff/Code/states</em>abbreviations.csv&quot;</span><span style="color: #009900;">)</span><br />survey<em>sum</em> &lt;- <a href="http://inside-r.org/r-doc/base/merge"><span style="color: #003399; font-weight: bold;">merge</span></a><span style="color: #009900;">(</span>survey<em>sum<span style="color: #339933;">,</span> statenames<span style="color: #339933;">,</span> by.x = <span style="color: blue;">&quot;st</em>posta&quot;</span><span style="color: #339933;">,</span> by.y = <span style="color: blue;">&quot;state<em>abbrev&quot;</span><span style="color: #009900;">)</span><br />survey</em>sum<em>map &lt;- <a href="http://inside-r.org/r-doc/base/merge"><span style="color: #003399; font-weight: bold;">merge</span></a><span style="color: #009900;">(</span>states<span style="color: #339933;">,</span> survey</em>sum<em><span style="color: #339933;">,</span> by.x = <span style="color: blue;">&quot;region&quot;</span><span style="color: #339933;">,</span> by.y = <span style="color: blue;">&quot;state&quot;</span><span style="color: #009900;">)</span><br />survey</em>sum<em>map &lt;- survey</em>sum<em>map<span style="color: #009900;">[</span><a href="http://inside-r.org/r-doc/base/order"><span style="color: #003399; font-weight: bold;">order</span></a><span style="color: #009900;">(</span>survey</em>sum<em>map$order<span style="color: #009900;">)</span><span style="color: #339933;">,</span> <span style="color: #009900;">]</span><br />&nbsp;<br /><span class="Apple-style-span" style="font-family: Times; white-space: normal;"><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">qplot<span style="color: #009900;">(</span>long<span style="color: #339933;">,</span> lat<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = survey</em>sum<em>map<span style="color: #339933;">,</span> group = group<span style="color: #339933;">,</span> fill = mean</em>ecol<em>hrs<span style="color: #339933;">,</span> geom = <span style="color: blue;">&quot;polygon&quot;</span><span style="color: #009900;">)</span> + scale</em>fill<em>gradient<span style="color: #009900;">(</span>low=<span style="color: blue;">&quot;black&quot;</span><span style="color: #339933;">,</span> high=<span style="color: blue;">&quot;green&quot;</span><span style="color: #009900;">)</span></pre></span></pre><pre class="r geshifilter-R" style="font-family: monospace;"></pre><pre class="r geshifilter-R"><span style="color: #009900;"></span><span class="Apple-style-span" style="font-family: Times; white-space: normal;">&lt;a href=&quot;http://3.bp.blogspot.com/-cNO2YWHX0Hk/TVQP5B7VxmI/AAAAAAAAEZ8/GBYKNR5vUBs/s1600/survey</em>ecol<em>map</em>green.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;<img border="0" height="458" src="http://3.bp.blogspot.com/-cNO2YWHX0Hk/TVQP5B7VxmI/AAAAAAAAEZ8/GBYKNR5vUBs/s640/survey_ecol_map_green.jpeg" width="640" /></a></span><br /><span class="Apple-style-span"><br /></span></pre><div class="separator" style="clear: both; text-align: center;"><br /></div><pre class="r geshifilter-R">&nbsp;<br /><br /><span class="Apple-style-span" style="font-family: Times; white-space: normal;"><pre class="r geshifilter-R" style="font-family: monospace; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;">qplot<span style="color: #009900;">(</span>long<span style="color: #339933;">,</span> lat<span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a> = survey<em>sum</em>map<span style="color: #339933;">,</span> group = group<span style="color: #339933;">,</span> fill = mean<em>evol</em>hrs<span style="color: #339933;">,</span> geom = <span style="color: blue;">&quot;polygon&quot;</span><span style="color: #009900;">)</span> + scale<em>fill</em>gradient<span style="color: #009900;">(</span>low=<span style="color: blue;">&quot;black&quot;</span><span style="color: #339933;">,</span> high=<span style="color: blue;">&quot;green&quot;</span><span style="color: #009900;">)</span></pre></span></pre><pre class="r geshifilter-R"><span style="color: #009900; font-family: monospace;"><span class="Apple-style-span" style="color: black; font-family: Times; white-space: normal;"><a href="http://2.bp.blogspot.com/-eLaIU-xsE78/TVQP5ol2gBI/AAAAAAAAEaA/vmGvlFhLmfE/s1600/survey_evol_map_green.jpeg" imageanchor="1" style="clear: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="458" src="http://2.bp.blogspot.com/-eLaIU-xsE78/TVQP5ol2gBI/AAAAAAAAEaA/vmGvlFhLmfE/s640/survey_evol_map_green.jpeg" style="cursor: move;" width="640" /></a></span></span></pre><pre class="r geshifilter-R"><pre class="r geshifilter-R" style="margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"><span class="Apple-style-span" style="font-family: Times;"><span class="Apple-style-span" style="white-space: normal;"><br /></span></span></pre></pre><pre class="r geshifilter-R" style="font-family: monospace;"></pre><pre class="r geshifilter-R"><span class="Apple-style-span" style="font-family: Times;"><span class="Apple-style-span" style="white-space: normal;"><br /></span></span></pre><pre class="r geshifilter-R" style="font-family: monospace;"></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a></p> @@ -15492,7 +15761,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Plants are less sex deprived when next to closely related neighbors - 2011-02-01T20:59:00-08:00 + 2011-02-02T05:59:00+01:00 http://recology.info//2011/02/plants-are-less-sex-deprived-when-next-to-closely-related-neighbors <p>A new early online paper in <a href="http://www.amjbot.org/cgi/content/abstract/ajb.1000329v1">American Journal of Botany</a> by <a href="http://mysite.science.uottawa.ca/rsargent/">Risa Sargent</a> and colleagues suggests that plants are less sex deprived (pollen limited) in vernal pools that have more closely related plant species. Vernal pools are (at least in my experience) small (to quite large) depressions that fill up with water with winter rains, and dry out completely in the summer. Vernal pool adapted plants flower in rings down the pool as the water dries up. Aquatic invertebrates and some herps can last through the summer by burrowing in the soil.</p> @@ -15509,7 +15778,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Good riddance to Excel pivot tables - 2011-01-30T20:36:00-08:00 + 2011-01-31T05:36:00+01:00 http://recology.info//2011/01/good-riddance-to-excel-pivot-tables <p>Excel pivot tables have been how I have reorganized data...up until now. These are just a couple of examples why R is superior to Excel for reorganizing data:</p> @@ -15526,7 +15795,7 @@ gggraph<span class="p">(</span>z<span class="p&quo R and Google Visualization API: Fish harvests - 2011-01-17T19:04:00-08:00 + 2011-01-18T04:04:00+01:00 http://recology.info//2011/01/r-and-google-visualization-api-fish <p>I recently gathered fish harvest data from the U.S. National Oceanic and Atmospheric Administarion (NOAA), which I downloaded from <a href="http://infochimps.com/">Infochimps</a>. The data is fish harvest by weight and value, by species for 21 years, from 1985 to 2005.<br /><br />Here is a link to a google document of the data I used below. I had to do some minor pocessing in Excel first; thus the link to this data.<br />https://spreadsheets.google.com/ccc?key=0Aq6aW8n11tS<em>dFRySXQzYkppLXFaU2F5aC04d19ZS0E&amp;hl=en<br /><br />Get the original data from Infochimps here:<br />http://infochimps.com/datasets/domestic-fish-and-shellfish-catch-value-and-price-by-species-198<br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><span class="Apple-style-span" style="color: #666666;"><i><br /></i></span></pre><pre class="r geshifilter-R" style="font-family: monospace;"><span class="Apple-style-span" style="color: #666666;"><i><br /></i></span></pre><pre class="r geshifilter-R" style="font-family: monospace;"><span class="Apple-style-span" style="color: #666666;"><i><br /></i></span></pre><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;"><br /></span></pre><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;">################# Fish harvest data ########################################</span><br /><a href="http://inside-r.org/r-doc/base/setwd"><span style="color: #003399; font-weight: bold;">setwd</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;/Mac/R</em>stuff/Blog<em>etc/Infochimps/Fishharvest&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># Set path</span><br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span><a href="http://www.blogger.com/packages/ggplot2">ggplot2</a><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span>googleVis<span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span><a href="http://www.blogger.com/packages/Hmisc">Hmisc</a><span style="color: #009900;">)</span><br />&nbsp;<br />fish &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;fishharvest.csv&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># read data</span><br />fish2 &lt;- melt<span style="color: #009900;">(</span>fish<span style="color: #339933;">,</span>id=<span style="color: #cc66cc;">1</span>:<span style="color: #cc66cc;">3</span><span style="color: #339933;">,</span>measure=<span style="color: #cc66cc;">4</span>:<span style="color: #cc66cc;">24</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># melt table</span><br />year &lt;- <a href="http://inside-r.org/r-doc/base/rep"><span style="color: #003399; font-weight: bold;">rep</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">1985</span>:<span style="color: #cc66cc;">2005</span><span style="color: #339933;">,</span> each = <span style="color: #cc66cc;">117</span><span style="color: #009900;">)</span><br />fish2 &lt;- <a href="http://inside-r.org/r-doc/base/data.frame"><span style="color: #003399; font-weight: bold;">data.frame</span></a><span style="color: #009900;">(</span>fish2<span style="color: #339933;">,</span>year<span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># replace year with actual values</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Google visusalization API</span><br />fishdata &lt;- <a href="http://inside-r.org/r-doc/base/data.frame"><span style="color: #003399; font-weight: bold;">data.frame</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/subset"><span style="color: #003399; font-weight: bold;">subset</span></a><span style="color: #009900;">(</span>fish2<span style="color: #339933;">,</span>fish2$var == <span style="color: blue;">&quot;quantity</em>1000lbs&quot;</span><span style="color: #339933;">,</span>-<span style="color: #cc66cc;">4</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span>value<em>1000dollars=<a href="http://inside-r.org/r-doc/base/subset"><span style="color: #003399; font-weight: bold;">subset</span></a><span style="color: #009900;">(</span>fish2<span style="color: #339933;">,</span>fish2$var == <span style="color: blue;">&quot;value</em>1000dollars&quot;</span><span style="color: #339933;">,</span>-<span style="color: #cc66cc;">4</span><span style="color: #009900;">)</span><span style="color: #009900;">[</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/names"><span style="color: #003399; font-weight: bold;">names</span></a><span style="color: #009900;">(</span>fishdata<span style="color: #009900;">)</span><span style="color: #009900;">[</span><span style="color: #cc66cc;">4</span><span style="color: #009900;">]</span> &lt;- <span style="color: blue;">&quot;quantity<em>1000lbs&quot;</span><br />fishharvest &lt;- gvisMotionChart<span style="color: #009900;">(</span>fishdata<span style="color: #339933;">,</span> idvar=<span style="color: blue;">&quot;species&quot;</span><span style="color: #339933;">,</span> timevar=<span style="color: blue;">&quot;year&quot;</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/graphics/plot"><span style="color: #003399; font-weight: bold;">plot</span></a><span style="color: #009900;">(</span>fishharvest<span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><br /><script src="http://www.google.com/jsapi" type="text/javascript"></script><br /><script type="text/javascript">google.load(&quot;visualization&quot;, &quot;1&quot;, { packages:[&quot;motionchart&quot;] });google.setOnLoadCallback(drawChart);function drawChart() {var data = new google.visualization.DataTable();var datajson = [ [ &quot;Anchovies&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 14566, 2704 ],[ &quot;Bluefish&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13743, 2363 ],[ &quot;Butterfish&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10338, 3537 ],[ &quot;Cod, Atlantic&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 82823, 35140 ],[ &quot;Cod, Pacific&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.2028e+05, 18556 ],[ &quot;Croaker&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 11088, 3658 ],[ &quot;Flounders&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.9572e+05,1.2912e+05 ],[ &quot;Haddock&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 14416, 13545 ],[ &quot;Halibut&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 61032, 38376 ],[ &quot;Herring, sea; Atlantic &quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 57133, 2968 ],[ &quot;Herring, sea; Pacific&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.4207e+05, 47025 ],[ &quot;Jack mackerel&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 20852, 1770 ],[ &quot;Mackerel, Chub&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 75453, 6324 ],[ &quot;Menhaden&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.7394e+06,1.0068e+05 ],[ &quot;Mullet&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 21205, 5720 ],[ &quot;Ocean perch, Atlantic&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9666, 3179 ],[ &quot;Ocean perch, Pacific&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9034, 1757 ],[ &quot;Pollock, Atlantic&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 43477, 6978 ],[ &quot;Pollock, Alaska&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 92833, 5409 ],[ &quot;Rockfish&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 82109, 23107 ],[ &quot;Sablefish&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 63380, 28692 ],[ &quot;Salmon&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,7.2695e+05,4.398e+05 ],[ &quot;Scup or porgy&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 15996, 9338 ],[ &quot;Sea trout, gray&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16400, 7330 ],[ &quot;Shark, Dogfish&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 11563, 842 ],[ &quot;Snapper, red&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5181, 10661 ],[ &quot;Swordfish&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12258, 33191 ],[ &quot;Tuna&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 83054, 52515 ],[ &quot;Whiting (Atlantic, silver)&quot;,1985,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 44545, 8274 ],[ &quot;Whiting (Pacific, hake)&quot;,1985,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16316, 792 ],[ &quot;Clams&quot;,1985,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.5055e+05,1.2835e+05 ],[ &quot;Crabs&quot;,1985,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.3763e+05,2.0304e+05 ],[ &quot;Lobsters: American&quot;,1985,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 46152,1.149e+05 ],[ &quot;Oysters&quot;,1985,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 44173, 70053 ],[ &quot;Scallops, Calico&quot;,1985,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 12513, 1281 ],[ &quot;Scallops, sea&quot;,1985,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 15829, 74562 ],[ &quot;Shrimp&quot;,1985,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.3369e+05,4.7285e+05 ],[ &quot;Squid, Atlantic&quot;,1985,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 7157, 7256 ],[ &quot;Squid, Pacific&quot;,1985,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 22276, 4047 ],[ &quot;Anchovies&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13436, 2523 ],[ &quot;Bluefish&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 13968, 2413 ],[ &quot;Butterfish&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9880, 6576 ],[ &quot;Cod, Atlantic&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 61134, 36142 ],[ &quot;Cod, Pacific&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0444e+05, 11337 ],[ &quot;Croaker&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12382, 3883 ],[ &quot;Flounders&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.6905e+05,1.2459e+05 ],[ &quot;Haddock&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10968, 10911 ],[ &quot;Halibut&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 77691, 82925 ],[ &quot;Herring, sea; Atlantic &quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 79381, 4314 ],[ &quot;Herring, sea; Pacific&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.3076e+05, 44598 ],[ &quot;Jack mackerel&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 23672, 1775 ],[ &quot;Mackerel, Chub&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 85444, 6408 ],[ &quot;Menhaden&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.3914e+06, 93762 ],[ &quot;Mullet&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 21646, 6043 ],[ &quot;Ocean perch, Atlantic&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6576, 3160 ],[ &quot;Ocean perch, Pacific&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 15585, 3116 ],[ &quot;Pollock, Atlantic&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 54469, 14044 ],[ &quot;Pollock, Alaska&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.3042e+05, 7167 ],[ &quot;Rockfish&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 92029, 28265 ],[ &quot;Sablefish&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 84868, 45879 ],[ &quot;Salmon&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,6.5852e+05,4.9394e+05 ],[ &quot;Scup or porgy&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16181, 9476 ],[ &quot;Sea trout, gray&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 20602, 7086 ],[ &quot;Shark, Dogfish&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 11114, 964 ],[ &quot;Snapper, red&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4489, 9820 ],[ &quot;Swordfish&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9671, 30508 ],[ &quot;Tuna&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 87811, 54575 ],[ &quot;Whiting (Atlantic, silver)&quot;,1986,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 39924, 8312 ],[ &quot;Whiting (Pacific, hake)&quot;,1986,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 25615, 1182 ],[ &quot;Clams&quot;,1986,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.4539e+05,1.3491e+05 ],[ &quot;Crabs&quot;,1986,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.5566e+05,2.7014e+05 ],[ &quot;Lobsters: American&quot;,1986,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 46053,1.2056e+05 ],[ &quot;Oysters&quot;,1986,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 40544, 78068 ],[ &quot;Scallops, Calico&quot;,1986,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 1616, 3087 ],[ &quot;Scallops, sea&quot;,1986,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 19992, 97415 ],[ &quot;Shrimp&quot;,1986,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,4.0018e+05,6.6274e+05 ],[ &quot;Squid, Atlantic&quot;,1986,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 33145, 10279 ],[ &quot;Squid, Pacific&quot;,1986,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 42262, 4368 ],[ &quot;Anchovies&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12857, 2706 ],[ &quot;Bluefish&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 15226, 3254 ],[ &quot;Butterfish&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10315, 6496 ],[ &quot;Cod, Atlantic&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 59100, 44179 ],[ &quot;Cod, Pacific&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.7061e+05, 31443 ],[ &quot;Croaker&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 11044, 4396 ],[ &quot;Flounders&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.9971e+05,1.4508e+05 ],[ &quot;Haddock&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6673, 8522 ],[ &quot;Halibut&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 76107, 88254 ],[ &quot;Herring, sea; Atlantic &quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 84536, 4368 ],[ &quot;Herring, sea; Pacific&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.226e+05, 47916 ],[ &quot;Jack mackerel&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 26671, 1831 ],[ &quot;Mackerel, Chub&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 93755, 6433 ],[ &quot;Menhaden&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.7123e+06,1.0442e+05 ],[ &quot;Mullet&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 30125, 8224 ],[ &quot;Ocean perch, Atlantic&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4202, 2709 ],[ &quot;Ocean perch, Pacific&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 19982, 4478 ],[ &quot;Pollock, Atlantic&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 45645, 17916 ],[ &quot;Pollock, Alaska&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.5205e+05, 45849 ],[ &quot;Rockfish&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.1788e+05, 38153 ],[ &quot;Sablefish&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.027e+05, 58107 ],[ &quot;Salmon&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.6202e+05,5.9638e+05 ],[ &quot;Scup or porgy&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 14295, 9810 ],[ &quot;Sea trout, gray&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 17927, 7499 ],[ &quot;Shark, Dogfish&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 14167, 1573 ],[ &quot;Snapper, red&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3732, 8661 ],[ &quot;Swordfish&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9761, 34974 ],[ &quot;Tuna&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0006e+05, 95803 ],[ &quot;Whiting (Atlantic, silver)&quot;,1987,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 34673, 11584 ],[ &quot;Whiting (Pacific, hake)&quot;,1987,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 38783, 1900 ],[ &quot;Clams&quot;,1987,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.3436e+05,1.3286e+05 ],[ &quot;Crabs&quot;,1987,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.8637e+05,3.2188e+05 ],[ &quot;Lobsters: American&quot;,1987,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 45558,1.3355e+05 ],[ &quot;Oysters&quot;,1987,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 39807, 92423 ],[ &quot;Scallops, Calico&quot;,1987,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 8155, 8889 ],[ &quot;Scallops, sea&quot;,1987,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 32038,1.3226e+05 ],[ &quot;Shrimp&quot;,1987,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.6314e+05,5.7808e+05 ],[ &quot;Squid, Atlantic&quot;,1987,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 38883, 12493 ],[ &quot;Squid, Pacific&quot;,1987,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 43166, 4300 ],[ &quot;Anchovies&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 12425, 2615 ],[ &quot;Bluefish&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16853, 3012 ],[ &quot;Butterfish&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5441, 3407 ],[ &quot;Cod, Atlantic&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 76073, 42941 ],[ &quot;Cod, Pacific&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.6720e+05, 38429 ],[ &quot;Croaker&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10604, 4596 ],[ &quot;Flounders&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.2862e+05,1.4014e+05 ],[ &quot;Haddock&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6429, 7030 ],[ &quot;Halibut&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 81608, 72718 ],[ &quot;Herring, sea; Atlantic &quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 90397, 5229 ],[ &quot;Herring, sea; Pacific&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.3143e+05, 57431 ],[ &quot;Jack mackerel&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 22546, 1685 ],[ &quot;Mackerel, Chub&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0034e+05, 7498 ],[ &quot;Menhaden&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.0861e+06,1.057e+05 ],[ &quot;Mullet&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 32642, 11218 ],[ &quot;Ocean perch, Atlantic&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 2350, 1467 ],[ &quot;Ocean perch, Pacific&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 11919, 2546 ],[ &quot;Pollock, Atlantic&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 33052, 11071 ],[ &quot;Pollock, Alaska&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.2573e+06, 95252 ],[ &quot;Rockfish&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.2403e+05, 38663 ],[ &quot;Sablefish&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0751e+05, 91779 ],[ &quot;Salmon&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,6.0615e+05,9.1067e+05 ],[ &quot;Scup or porgy&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 14358, 9572 ],[ &quot;Sea trout, gray&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 20533, 7948 ],[ &quot;Shark, Dogfish&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10070, 975 ],[ &quot;Snapper, red&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4154, 9496 ],[ &quot;Swordfish&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 12818, 42703 ],[ &quot;Tuna&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.1135e+05,1.2099e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1988,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35569, 8621 ],[ &quot;Whiting (Pacific, hake)&quot;,1988,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 15740, 1160 ],[ &quot;Clams&quot;,1988,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.3174e+05,1.3478e+05 ],[ &quot;Crabs&quot;,1988,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,4.5563e+05,3.8356e+05 ],[ &quot;Lobsters: American&quot;,1988,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 48643,1.4524e+05 ],[ &quot;Oysters&quot;,1988,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 31892, 78498 ],[ &quot;Scallops, Calico&quot;,1988,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 11868, 12462 ],[ &quot;Scallops, sea&quot;,1988,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 30557,1.2824e+05 ],[ &quot;Shrimp&quot;,1988,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.3087e+05,5.0603e+05 ],[ &quot;Squid, Atlantic&quot;,1988,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 46739, 16220 ],[ &quot;Squid, Pacific&quot;,1988,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 80426, 7689 ],[ &quot;Anchovies&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 13389, 2696 ],[ &quot;Bluefish&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10429, 2245 ],[ &quot;Butterfish&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7051, 4108 ],[ &quot;Cod, Atlantic&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 78423, 47772 ],[ &quot;Cod, Pacific&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.7214e+05, 55375 ],[ &quot;Croaker&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8287, 4114 ],[ &quot;Flounders&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.0249e+05,1.1983e+05 ],[ &quot;Haddock&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3808, 4538 ],[ &quot;Halibut&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 75168, 85145 ],[ &quot;Herring, sea; Atlantic &quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 89657, 5041 ],[ &quot;Herring, sea; Pacific&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.1935e+05, 24391 ],[ &quot;Jack mackerel&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 28422, 1927 ],[ &quot;Mackerel, Chub&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 88667, 6023 ],[ &quot;Menhaden&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.9887e+06, 84462 ],[ &quot;Mullet&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 31594, 15023 ],[ &quot;Ocean perch, Atlantic&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 1392, 919 ],[ &quot;Ocean perch, Pacific&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 22332, 4646 ],[ &quot;Pollock, Atlantic&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 23249, 9922 ],[ &quot;Pollock, Alaska&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.362e+06,1.8692e+05 ],[ &quot;Rockfish&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.3362e+05, 42338 ],[ &quot;Sablefish&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 97590, 73272 ],[ &quot;Salmon&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,7.8587e+05,5.9123e+05 ],[ &quot;Scup or porgy&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9582, 7720 ],[ &quot;Sea trout, gray&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 14187, 7160 ],[ &quot;Shark, Dogfish&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12804, 1602 ],[ &quot;Snapper, red&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3959, 10329 ],[ &quot;Swordfish&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 11768, 38321 ],[ &quot;Tuna&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 89413,1.0354e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1989,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 39353, 9403 ],[ &quot;Whiting (Pacific, hake)&quot;,1989,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16564, 1094 ],[ &quot;Clams&quot;,1989,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.3817e+05,1.3494e+05 ],[ &quot;Crabs&quot;,1989,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,4.5838e+05,4.144e+05 ],[ &quot;Lobsters: American&quot;,1989,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 52926,1.4912e+05 ],[ &quot;Oysters&quot;,1989,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 29926, 83585 ],[ &quot;Scallops, Calico&quot;,1989,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 6580, 5928 ],[ &quot;Scallops, sea&quot;,1989,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 33757,1.3259e+05 ],[ &quot;Shrimp&quot;,1989,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.5151e+05,4.6757e+05 ],[ &quot;Squid, Atlantic&quot;,1989,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 66829, 25592 ],[ &quot;Squid, Pacific&quot;,1989,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 60509, 5509 ],[ &quot;Anchovies&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13189, 2723 ],[ &quot;Bluefish&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 13802, 3239 ],[ &quot;Butterfish&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6532, 3334 ],[ &quot;Cod, Atlantic&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 95881, 61329 ],[ &quot;Cod, Pacific&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.264e+05, 91384 ],[ &quot;Croaker&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6720, 3390 ],[ &quot;Flounders&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.5452e+05,1.1292e+05 ],[ &quot;Haddock&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5440, 5967 ],[ &quot;Halibut&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 70454, 96700 ],[ &quot;Herring, sea; Atlantic &quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.131e+05, 5746 ],[ &quot;Herring, sea; Pacific&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0812e+05, 32178 ],[ &quot;Jack mackerel&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8959, 535 ],[ &quot;Mackerel, Chub&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 83721, 5081 ],[ &quot;Menhaden&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.9622e+06, 93896 ],[ &quot;Mullet&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 28554, 12738 ],[ &quot;Ocean perch, Atlantic&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 1322, 703 ],[ &quot;Ocean perch, Pacific&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 60972, 8494 ],[ &quot;Pollock, Atlantic&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 21042, 10516 ],[ &quot;Pollock, Alaska&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.108e+06,2.6834e+05 ],[ &quot;Rockfish&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.3154e+05, 38504 ],[ &quot;Sablefish&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 89802, 58865 ],[ &quot;Salmon&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,7.3315e+05,6.1237e+05 ],[ &quot;Scup or porgy&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 11452, 8677 ],[ &quot;Sea trout, gray&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9880, 5777 ],[ &quot;Shark, Dogfish&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35793, 3801 ],[ &quot;Snapper, red&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 3101, 8411 ],[ &quot;Swordfish&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13797, 40851 ],[ &quot;Tuna&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 62393,1.0504e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1990,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 44500, 11281 ],[ &quot;Whiting (Pacific, hake)&quot;,1990,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 21232, 1229 ],[ &quot;Clams&quot;,1990,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.392e+05,1.3019e+05 ],[ &quot;Crabs&quot;,1990,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,4.9942e+05,4.8384e+05 ],[ &quot;Lobsters: American&quot;,1990,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 61017,1.5468e+05 ],[ &quot;Oysters&quot;,1990,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 29193, 93718 ],[ &quot;Scallops, Calico&quot;,1990,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,1990,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 39917,1.537e+05 ],[ &quot;Shrimp&quot;,1990,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.4649e+05,4.9143e+05 ],[ &quot;Squid, Atlantic&quot;,1990,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 59809, 21178 ],[ &quot;Squid, Pacific&quot;,1990,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 36082, 2636 ],[ &quot;Anchovies&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 19245, 6813 ],[ &quot;Bluefish&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13232, 2731 ],[ &quot;Butterfish&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6181, 3171 ],[ &quot;Cod, Atlantic&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 92636, 74093 ],[ &quot;Cod, Pacific&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.5372e+05,1.2293e+05 ],[ &quot;Croaker&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4761, 2146 ],[ &quot;Flounders&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.0496e+05,1.4479e+05 ],[ &quot;Haddock&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4053, 4581 ],[ &quot;Halibut&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 66339, 99532 ],[ &quot;Herring, sea; Atlantic &quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0707e+05, 6339 ],[ &quot;Herring, sea; Pacific&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.2314e+05, 30832 ],[ &quot;Jack mackerel&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3618, 24433 ],[ &quot;Mackerel, Chub&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 63336, 4872 ],[ &quot;Menhaden&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.9771e+06, 77694 ],[ &quot;Mullet&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12717, 10569 ],[ &quot;Ocean perch, Atlantic&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 1176, 543 ],[ &quot;Ocean perch, Pacific&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 29994, 6445 ],[ &quot;Pollock, Atlantic&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 17344, 9901 ],[ &quot;Pollock, Alaska&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.8553e+06,2.4155e+05 ],[ &quot;Rockfish&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 97466,1.0993e+05 ],[ &quot;Sablefish&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 83610, 78166 ],[ &quot;Salmon&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,7.8328e+05,3.5972e+05 ],[ &quot;Scup or porgy&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16145, 9127 ],[ &quot;Sea trout, gray&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8668, 4854 ],[ &quot;Shark, Dogfish&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 34376, 3444 ],[ &quot;Snapper, red&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 2358, 5681 ],[ &quot;Swordfish&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 17963, 50155 ],[ &quot;Tuna&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35695, 75053 ],[ &quot;Whiting (Atlantic, silver)&quot;,1991,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 36533, 11219 ],[ &quot;Whiting (Pacific, hake)&quot;,1991,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 55964, 3692 ],[ &quot;Clams&quot;,1991,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.3424e+05,1.2527e+05 ],[ &quot;Crabs&quot;,1991,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,6.4999e+05,4.1484e+05 ],[ &quot;Lobsters: American&quot;,1991,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 63337,1.6503e+05 ],[ &quot;Oysters&quot;,1991,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 31859, 97996 ],[ &quot;Scallops, Calico&quot;,1991,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,null, 858 ],[ &quot;Scallops, sea&quot;,1991,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 39302,1.5893e+05 ],[ &quot;Shrimp&quot;,1991,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.2009e+05,5.1285e+05 ],[ &quot;Squid, Atlantic&quot;,1991,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 70929, 30346 ],[ &quot;Squid, Pacific&quot;,1991,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 68640, 5288 ],[ &quot;Anchovies&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13679, 5228 ],[ &quot;Bluefish&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 11595, 2694 ],[ &quot;Butterfish&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7608, 3701 ],[ &quot;Cod, Atlantic&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 61283, 52013 ],[ &quot;Cod, Pacific&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.5053e+05,1.3248e+05 ],[ &quot;Croaker&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5077, 1893 ],[ &quot;Flounders&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,6.4583e+05,1.4351e+05 ],[ &quot;Haddock&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5111, 5582 ],[ &quot;Halibut&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 68579, 53773 ],[ &quot;Herring, sea; Atlantic &quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.2299e+05, 6821 ],[ &quot;Herring, sea; Pacific&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.5905e+05, 35907 ],[ &quot;Jack mackerel&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 2624, 245 ],[ &quot;Mackerel, Chub&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 41879, 4118 ],[ &quot;Menhaden&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.6443e+06, 82973 ],[ &quot;Mullet&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 22479, 9730 ],[ &quot;Ocean perch, Atlantic&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 1867, 790 ],[ &quot;Ocean perch, Pacific&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 45484, 13561 ],[ &quot;Pollock, Atlantic&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 15843, 10543 ],[ &quot;Pollock, Alaska&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.9521e+06,3.2474e+05 ],[ &quot;Rockfish&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.2507e+05, 47280 ],[ &quot;Sablefish&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 75451, 79634 ],[ &quot;Salmon&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,7.1583e+05,5.8285e+05 ],[ &quot;Scup or porgy&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 14014, 8669 ],[ &quot;Sea trout, gray&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7467, 4479 ],[ &quot;Shark, Dogfish&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 42327, 4675 ],[ &quot;Snapper, red&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 3471, 7354 ],[ &quot;Swordfish&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 19713, 60320 ],[ &quot;Tuna&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 56803, 90822 ],[ &quot;Whiting (Atlantic, silver)&quot;,1992,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35893, 10990 ],[ &quot;Whiting (Pacific, hake)&quot;,1992,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.2375e+05, 5866 ],[ &quot;Clams&quot;,1992,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.4245e+05,1.2733e+05 ],[ &quot;Crabs&quot;,1992,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,6.2432e+05,4.7132e+05 ],[ &quot;Lobsters: American&quot;,1992,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 55841,1.6095e+05 ],[ &quot;Oysters&quot;,1992,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 36156,1.1454e+05 ],[ &quot;Scallops, Calico&quot;,1992,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,1992,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 33528,1.6223e+05 ],[ &quot;Shrimp&quot;,1992,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.3776e+05,4.7995e+05 ],[ &quot;Squid, Atlantic&quot;,1992,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 81944, 33925 ],[ &quot;Squid, Pacific&quot;,1992,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 30436, 2698 ],[ &quot;Anchovies&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9786, 4518 ],[ &quot;Bluefish&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10575, 2970 ],[ &quot;Butterfish&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10470, 7039 ],[ &quot;Cod, Atlantic&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 50503, 44956 ],[ &quot;Cod, Pacific&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.828e+05,1.1617e+05 ],[ &quot;Croaker&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9377, 3758 ],[ &quot;Flounders&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.9918e+05,1.356e+05 ],[ &quot;Haddock&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 1938, 2672 ],[ &quot;Halibut&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 63053, 62391 ],[ &quot;Herring, sea; Atlantic &quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0964e+05, 6511 ],[ &quot;Herring, sea; Pacific&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.0657e+05, 18711 ],[ &quot;Jack mackerel&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3925, 232 ],[ &quot;Mackerel, Chub&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 24051, 1359 ],[ &quot;Menhaden&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.9833e+06,1.0326e+05 ],[ &quot;Mullet&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 31568, 12870 ],[ &quot;Ocean perch, Atlantic&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 1764, 805 ],[ &quot;Ocean perch, Pacific&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 45522, 13477 ],[ &quot;Pollock, Atlantic&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 12500, 8370 ],[ &quot;Pollock, Alaska&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.258e+06,3.5838e+05 ],[ &quot;Rockfish&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.2066e+05, 45696 ],[ &quot;Sablefish&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 77467, 83861 ],[ &quot;Salmon&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,8.8813e+05,4.2353e+05 ],[ &quot;Scup or porgy&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10301, 6439 ],[ &quot;Sea trout, gray&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7032, 4216 ],[ &quot;Shark, Dogfish&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 50164, 6257 ],[ &quot;Snapper, red&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3520, 7608 ],[ &quot;Swordfish&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 21951, 55544 ],[ &quot;Tuna&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 55392, 91430 ],[ &quot;Whiting (Atlantic, silver)&quot;,1993,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 35735, 12872 ],[ &quot;Whiting (Pacific, hake)&quot;,1993,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.1018e+05, 9326 ],[ &quot;Clams&quot;,1993,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.4775e+05,1.3803e+05 ],[ &quot;Crabs&quot;,1993,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,6.0444e+05,5.1049e+05 ],[ &quot;Lobsters: American&quot;,1993,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 56513,1.5175e+05 ],[ &quot;Oysters&quot;,1993,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 33575, 86698 ],[ &quot;Scallops, Calico&quot;,1993,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,1993,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 18116,1.056e+05 ],[ &quot;Shrimp&quot;,1993,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,2.9289e+05,4.129e+05 ],[ &quot;Squid, Atlantic&quot;,1993,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 90809, 38323 ],[ &quot;Squid, Pacific&quot;,1993,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 71550, 8079 ],[ &quot;Anchovies&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8334, 1890 ],[ &quot;Bluefish&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9825, 2937 ],[ &quot;Butterfish&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10215, 4857 ],[ &quot;Cod, Atlantic&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 38653, 36160 ],[ &quot;Cod, Pacific&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,4.6029e+05, 95836 ],[ &quot;Croaker&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 11460, 4321 ],[ &quot;Flounders&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,4.2686e+05,1.2658e+05 ],[ &quot;Haddock&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 724, 1033 ],[ &quot;Halibut&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 57900, 84898 ],[ &quot;Herring, sea; Atlantic &quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.01e+05, 5816 ],[ &quot;Herring, sea; Pacific&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.1284e+05, 25587 ],[ &quot;Jack mackerel&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6387, 366 ],[ &quot;Mackerel, Chub&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 22148, 1283 ],[ &quot;Menhaden&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.3235e+06,1.2836e+05 ],[ &quot;Mullet&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 30544, 14943 ],[ &quot;Ocean perch, Atlantic&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 968, 602 ],[ &quot;Ocean perch, Pacific&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 30865, 4355 ],[ &quot;Pollock, Atlantic&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8238, 6730 ],[ &quot;Pollock, Alaska&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.1245e+06,3.7593e+05 ],[ &quot;Rockfish&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 91705, 44062 ],[ &quot;Sablefish&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 71339, 84827 ],[ &quot;Salmon&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,9.0109e+05,4.5641e+05 ],[ &quot;Scup or porgy&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10297, 7028 ],[ &quot;Sea trout, gray&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6101, 3998 ],[ &quot;Shark, Dogfish&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 46829, 6393 ],[ &quot;Snapper, red&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 3771, 8718 ],[ &quot;Swordfish&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 15260, 44728 ],[ &quot;Tuna&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 71795,1.0866e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1994,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35619, 13828 ],[ &quot;Whiting (Pacific, hake)&quot;,1994,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.572e+05, 16747 ],[ &quot;Clams&quot;,1994,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.3143e+05,1.2236e+05 ],[ &quot;Crabs&quot;,1994,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,4.4694e+05,5.3299e+05 ],[ &quot;Lobsters: American&quot;,1994,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 66416,1.9618e+05 ],[ &quot;Oysters&quot;,1994,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 38086, 93737 ],[ &quot;Scallops, Calico&quot;,1994,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 7162, 6944 ],[ &quot;Scallops, sea&quot;,1994,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 18228, 91793 ],[ &quot;Shrimp&quot;,1994,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,2.8226e+06,5.6417e+05 ],[ &quot;Squid, Atlantic&quot;,1994,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,9.3339e+05, 43409 ],[ &quot;Squid, Pacific&quot;,1994,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.224e+05, 20335 ],[ &quot;Anchovies&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6788, 1193 ],[ &quot;Bluefish&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8379, 2855 ],[ &quot;Butterfish&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6430, 3186 ],[ &quot;Cod, Atlantic&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 29631, 28184 ],[ &quot;Cod, Pacific&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.914e+05,1.0968e+05 ],[ &quot;Croaker&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16027, 5855 ],[ &quot;Flounders&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.2344e+05,1.5024e+05 ],[ &quot;Haddock&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 877, 1207 ],[ &quot;Halibut&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 44796, 66781 ],[ &quot;Herring, sea; Atlantic &quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.4718e+05, 8654 ],[ &quot;Herring, sea; Pacific&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.1748e+05, 49245 ],[ &quot;Jack mackerel&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4132, 279 ],[ &quot;Mackerel, Chub&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 18974, 1130 ],[ &quot;Menhaden&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.847e+06, 99131 ],[ &quot;Mullet&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 22249, 14725 ],[ &quot;Ocean perch, Atlantic&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 962, 608 ],[ &quot;Ocean perch, Pacific&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 34420, 4660 ],[ &quot;Pollock, Atlantic&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7152, 6602 ],[ &quot;Pollock, Alaska&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.8526e+06,2.5961e+05 ],[ &quot;Rockfish&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 90119, 41125 ],[ &quot;Sablefish&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 65904,1.2369e+05 ],[ &quot;Salmon&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.0208e+06,4.8611e+05 ],[ &quot;Scup or porgy&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7221, 6646 ],[ &quot;Sea trout, gray&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6824, 4124 ],[ &quot;Shark, Dogfish&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 52980, 9516 ],[ &quot;Snapper, red&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3645, 8356 ],[ &quot;Swordfish&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 13043, 37270 ],[ &quot;Tuna&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 63864,1.0264e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1995,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 33548, 14632 ],[ &quot;Whiting (Pacific, hake)&quot;,1995,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.903e+05, 18002 ],[ &quot;Clams&quot;,1995,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.3422e+05,1.4041e+05 ],[ &quot;Crabs&quot;,1995,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.6364e+05,5.1199e+05 ],[ &quot;Lobsters: American&quot;,1995,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 66406,2.1484e+05 ],[ &quot;Oysters&quot;,1995,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 40380,1.0157e+05 ],[ &quot;Scallops, Calico&quot;,1995,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 957, 1219 ],[ &quot;Scallops, sea&quot;,1995,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 18316, 92826 ],[ &quot;Shrimp&quot;,1995,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.0687e+05,5.7003e+05 ],[ &quot;Squid, Atlantic&quot;,1995,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 74248, 33269 ],[ &quot;Squid, Pacific&quot;,1995,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.5528e+05, 22660 ],[ &quot;Anchovies&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9933, 988 ],[ &quot;Bluefish&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9356, 3166 ],[ &quot;Butterfish&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9685, 5747 ],[ &quot;Cod, Atlantic&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 31422, 26634 ],[ &quot;Cod, Pacific&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,6.0531e+05,1.1198e+05 ],[ &quot;Croaker&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 20483, 7386 ],[ &quot;Flounders&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,4.5953e+05,1.5405e+05 ],[ &quot;Haddock&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 1257, 1494 ],[ &quot;Halibut&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 49092, 83468 ],[ &quot;Herring, sea; Atlantic &quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.9712e+05, 11194 ],[ &quot;Herring, sea; Pacific&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.2043e+05, 69747 ],[ &quot;Jack mackerel&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4798, 296 ],[ &quot;Mackerel, Chub&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 21994, 1336 ],[ &quot;Menhaden&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.7555e+06, 94150 ],[ &quot;Mullet&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 17026, 12518 ],[ &quot;Ocean perch, Atlantic&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 720, 477 ],[ &quot;Ocean perch, Pacific&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 46305, 6171 ],[ &quot;Pollock, Atlantic&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6529, 4543 ],[ &quot;Pollock, Alaska&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.6231e+06,2.3813e+05 ],[ &quot;Rockfish&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 94760, 39049 ],[ &quot;Sablefish&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 59949,1.0901e+05 ],[ &quot;Salmon&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,8.7706e+05,3.6873e+05 ],[ &quot;Scup or porgy&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6895, 7330 ],[ &quot;Sea trout, gray&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7189, 4839 ],[ &quot;Shark, Dogfish&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 65342, 11804 ],[ &quot;Snapper, red&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4422, 9528 ],[ &quot;Swordfish&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 12879, 36494 ],[ &quot;Tuna&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 85439,1.1026e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1996,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35400, 13494 ],[ &quot;Whiting (Pacific, hake)&quot;,1996,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.3054e+05, 17031 ],[ &quot;Clams&quot;,1996,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.2324e+05,1.2778e+05 ],[ &quot;Crabs&quot;,1996,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.918e+05,4.2670e+05 ],[ &quot;Lobsters: American&quot;,1996,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 71641,2.418e+05 ],[ &quot;Oysters&quot;,1996,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 38007,1.1484e+05 ],[ &quot;Scallops, Calico&quot;,1996,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,1996,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 18162,1.0182e+05 ],[ &quot;Shrimp&quot;,1996,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.1688e+05,5.092e+05 ],[ &quot;Squid, Atlantic&quot;,1996,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 65248, 28514 ],[ &quot;Squid, Pacific&quot;,1996,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.7478e+05, 31712 ],[ &quot;Anchovies&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12738, 827 ],[ &quot;Bluefish&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9307, 2849 ],[ &quot;Butterfish&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7544, 5112 ],[ &quot;Cod, Atlantic&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 28619, 24464 ],[ &quot;Cod, Pacific&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,6.6131e+05,1.4243e+05 ],[ &quot;Croaker&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 27781, 9058 ],[ &quot;Flounders&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.6635e+05,1.3077e+05 ],[ &quot;Haddock&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3316, 3595 ],[ &quot;Halibut&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 69864,1.1736e+05 ],[ &quot;Herring, sea; Atlantic &quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.1101e+05, 11543 ],[ &quot;Herring, sea; Pacific&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.3686e+05, 29341 ],[ &quot;Jack mackerel&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 2557, 281 ],[ &quot;Mackerel, Chub&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 40558, 2762 ],[ &quot;Menhaden&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.0278e+06,1.1205e+05 ],[ &quot;Mullet&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 19637, 13419 ],[ &quot;Ocean perch, Atlantic&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 553, 346 ],[ &quot;Ocean perch, Pacific&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 43166, 3186 ],[ &quot;Pollock, Atlantic&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9372, 5349 ],[ &quot;Pollock, Alaska&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.5125e+06,2.4259e+05 ],[ &quot;Rockfish&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.1699e+05, 47899 ],[ &quot;Sablefish&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 52925,1.0878e+05 ],[ &quot;Salmon&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.6766e+05,2.7037e+05 ],[ &quot;Scup or porgy&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5718, 7449 ],[ &quot;Sea trout, gray&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7314, 4136 ],[ &quot;Shark, Dogfish&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 46345, 7307 ],[ &quot;Snapper, red&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5161, 10365 ],[ &quot;Swordfish&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 13586, 33786 ],[ &quot;Tuna&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 82855,1.0979e+05 ],[ &quot;Whiting (Atlantic, silver)&quot;,1997,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 34248, 15024 ],[ &quot;Whiting (Pacific, hake)&quot;,1997,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,4.996e+05, 27344 ],[ &quot;Clams&quot;,1997,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.1418e+05,1.2969e+05 ],[ &quot;Crabs&quot;,1997,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,4.2996e+05,4.2955e+05 ],[ &quot;Lobsters: American&quot;,1997,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 83921,2.6722e+05 ],[ &quot;Oysters&quot;,1997,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 37115, 92243 ],[ &quot;Scallops, Calico&quot;,1997,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,null, 3601 ],[ &quot;Scallops, sea&quot;,1997,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 13789, 90291 ],[ &quot;Shrimp&quot;,1997,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,2.9026e+05,5.4406e+05 ],[ &quot;Squid, Atlantic&quot;,1997,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 66177, 33343 ],[ &quot;Squid, Pacific&quot;,1997,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.5761e+05, 22108 ],[ &quot;Anchovies&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3429, 242 ],[ &quot;Bluefish&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8299, 2765 ],[ &quot;Butterfish&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5684, 2848 ],[ &quot;Cod, Atlantic&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 24514, 25474 ],[ &quot;Cod, Pacific&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.5599e+05, 87717 ],[ &quot;Croaker&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 25542, 9719 ],[ &quot;Flounders&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.9118e+05, 96804 ],[ &quot;Haddock&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6252, 7880 ],[ &quot;Halibut&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 73260,1.0397e+05 ],[ &quot;Herring, sea; Atlantic &quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.7972e+05, 10856 ],[ &quot;Herring, sea; Pacific&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 92296, 10722 ],[ &quot;Jack mackerel&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 3445, 308 ],[ &quot;Mackerel, Chub&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 45025, 2547 ],[ &quot;Menhaden&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.7057e+06,1.0384e+05 ],[ &quot;Mullet&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 17921, 9420 ],[ &quot;Ocean perch, Atlantic&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 706, 419 ],[ &quot;Ocean perch, Pacific&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 40663, 4072 ],[ &quot;Pollock, Atlantic&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 12307, 8101 ],[ &quot;Pollock, Alaska&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.7165e+06,1.9015e+05 ],[ &quot;Rockfish&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 70058, 33041 ],[ &quot;Sablefish&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 43500, 91823 ],[ &quot;Salmon&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,6.4443e+05,2.5746e+05 ],[ &quot;Scup or porgy&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4913, 6882 ],[ &quot;Sea trout, gray&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8423, 4060 ],[ &quot;Shark, Dogfish&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 49111, 8139 ],[ &quot;Snapper, red&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4257, 9899 ],[ &quot;Swordfish&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 15093, 28888 ],[ &quot;Tuna&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 84999, 94462 ],[ &quot;Whiting (Atlantic, silver)&quot;,1998,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 32990, 13369 ],[ &quot;Whiting (Pacific, hake)&quot;,1998,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.0155e+05, 19931 ],[ &quot;Clams&quot;,1998,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.0796e+05,1.3524e+05 ],[ &quot;Crabs&quot;,1998,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,5.5272e+05,4.7338e+05 ],[ &quot;Lobsters: American&quot;,1998,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 79642,2.5364e+05 ],[ &quot;Oysters&quot;,1998,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 33538, 88627 ],[ &quot;Scallops, Calico&quot;,1998,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,1998,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 13061, 79606 ],[ &quot;Shrimp&quot;,1998,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,2.7776e+05,5.1562e+05 ],[ &quot;Squid, Atlantic&quot;,1998,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 93245, 41881 ],[ &quot;Squid, Pacific&quot;,1998,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 6208, 1591 ],[ &quot;Anchovies&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 11736, 1358 ],[ &quot;Bluefish&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7405, 2673 ],[ &quot;Butterfish&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6090, 3068 ],[ &quot;Cod, Atlantic&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 21445, 23943 ],[ &quot;Cod, Pacific&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.2399e+05, 83227 ],[ &quot;Croaker&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 27016, 7798 ],[ &quot;Flounders&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.3122e+05, 89946 ],[ &quot;Haddock&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6937, 9120 ],[ &quot;Halibut&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 80330,1.247e+05 ],[ &quot;Herring, sea; Atlantic &quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.7548e+05, 11082 ],[ &quot;Herring, sea; Pacific&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 91059, 14989 ],[ &quot;Jack mackerel&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 2461, 199 ],[ &quot;Mackerel, Chub&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 19217, 1082 ],[ &quot;Menhaden&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.9891e+06,1.1308e+05 ],[ &quot;Mullet&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 15336, 8486 ],[ &quot;Ocean perch, Atlantic&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 778, 420 ],[ &quot;Ocean perch, Pacific&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 45402, 3640 ],[ &quot;Pollock, Atlantic&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10129, 8440 ],[ &quot;Pollock, Alaska&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.3259e+06,1.6281e+05 ],[ &quot;Rockfish&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 63223, 30436 ],[ &quot;Sablefish&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 48255, 97148 ],[ &quot;Salmon&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,8.149e+05,3.5978e+05 ],[ &quot;Scup or porgy&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 3879, 4783 ],[ &quot;Sea trout, gray&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6935, 4188 ],[ &quot;Shark, Dogfish&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 36712, 5951 ],[ &quot;Snapper, red&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4722, 10492 ],[ &quot;Swordfish&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16022, 33436 ],[ &quot;Tuna&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 58120, 86254 ],[ &quot;Whiting (Atlantic, silver)&quot;,1999,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 30997, 14282 ],[ &quot;Whiting (Pacific, hake)&quot;,1999,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,4.7815e+05, 18593 ],[ &quot;Clams&quot;,1999,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.1223e+05,1.3502e+05 ],[ &quot;Crabs&quot;,1999,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,4.5831e+05,5.2124e+05 ],[ &quot;Lobsters: American&quot;,1999,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 87469,3.2296e+05 ],[ &quot;Oysters&quot;,1999,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 26983, 72658 ],[ &quot;Scallops, Calico&quot;,1999,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 4105, 3880 ],[ &quot;Scallops, sea&quot;,1999,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 23038,1.2529e+05 ],[ &quot;Shrimp&quot;,1999,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.0417e+05,5.605e+05 ],[ &quot;Squid, Atlantic&quot;,1999,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 58310, 36218 ],[ &quot;Squid, Pacific&quot;,1999,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.9989e+05, 34954 ],[ &quot;Anchovies&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 25324, 1129 ],[ &quot;Bluefish&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8072, 2796 ],[ &quot;Butterfish&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4677, 1922 ],[ &quot;Cod, Atlantic&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 25060, 26384 ],[ &quot;Cod, Pacific&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.3050e+05,1.4233e+05 ],[ &quot;Croaker&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 26991, 10249 ],[ &quot;Flounders&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,4.1272e+05,1.0991e+05 ],[ &quot;Haddock&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8823, 11575 ],[ &quot;Halibut&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 75190,1.4383e+05 ],[ &quot;Herring, sea; Atlantic &quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.6027e+05, 9972 ],[ &quot;Herring, sea; Pacific&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 74835, 12043 ],[ &quot;Jack mackerel&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 2902, 247 ],[ &quot;Mackerel, Chub&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 47065, 2826 ],[ &quot;Menhaden&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.7605e+06,1.124e+05 ],[ &quot;Mullet&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 20554, 13652 ],[ &quot;Ocean perch, Atlantic&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 702, 375 ],[ &quot;Ocean perch, Pacific&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 39521, 2597 ],[ &quot;Pollock, Atlantic&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8913, 7028 ],[ &quot;Pollock, Alaska&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.6068e+06,1.6052e+05 ],[ &quot;Rockfish&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 50044, 23385 ],[ &quot;Sablefish&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 49680,1.0116e+05 ],[ &quot;Salmon&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,6.2864e+05,2.7021e+05 ],[ &quot;Scup or porgy&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3018, 3670 ],[ &quot;Sea trout, gray&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5375, 3589 ],[ &quot;Shark, Dogfish&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 23680, 4853 ],[ &quot;Snapper, red&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5084, 12003 ],[ &quot;Swordfish&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 17805, 37981 ],[ &quot;Tuna&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 50779, 95176 ],[ &quot;Whiting (Atlantic, silver)&quot;,2000,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 26855, 11370 ],[ &quot;Whiting (Pacific, hake)&quot;,2000,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.5272e+05, 18809 ],[ &quot;Clams&quot;,2000,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.1848e+05,1.5397e+05 ],[ &quot;Crabs&quot;,2000,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,2.9901e+05,4.0501e+05 ],[ &quot;Lobsters: American&quot;,2000,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 83180,3.013e+05 ],[ &quot;Oysters&quot;,2000,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 41146, 90667 ],[ &quot;Scallops, Calico&quot;,2000,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,2000,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 32747,1.6461e+05 ],[ &quot;Shrimp&quot;,2000,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.3249e+05,6.9045e+05 ],[ &quot;Squid, Atlantic&quot;,2000,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 57520, 27956 ],[ &quot;Squid, Pacific&quot;,2000,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,2.5951e+05, 27077 ],[ &quot;Anchovies&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 42460, 1422 ],[ &quot;Bluefish&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8804, 3088 ],[ &quot;Butterfish&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10939, 3582 ],[ &quot;Cod, Atlantic&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 33211, 32086 ],[ &quot;Cod, Pacific&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.7171e+05,1.1807e+05 ],[ &quot;Croaker&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 29000, 8315 ],[ &quot;Flounders&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.5236e+05,1.0524e+05 ],[ &quot;Haddock&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 12845, 14513 ],[ &quot;Halibut&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 77978,1.1517e+05 ],[ &quot;Herring, sea; Atlantic &quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.0919e+05, 12717 ],[ &quot;Herring, sea; Pacific&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 91297, 13213 ],[ &quot;Jack mackerel&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8464, 614 ],[ &quot;Mackerel, Chub&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 15981, 1172 ],[ &quot;Menhaden&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.7414e+06,1.0269e+05 ],[ &quot;Mullet&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 18535, 11332 ],[ &quot;Ocean perch, Atlantic&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 794, 358 ],[ &quot;Ocean perch, Pacific&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 38997, 1692 ],[ &quot;Pollock, Atlantic&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9058, 6200 ],[ &quot;Pollock, Alaska&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.1794e+06,2.3072e+05 ],[ &quot;Rockfish&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 43909, 21158 ],[ &quot;Sablefish&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 44037, 80361 ],[ &quot;Salmon&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,7.2283e+05,2.0893e+05 ],[ &quot;Scup or porgy&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4538, 3857 ],[ &quot;Sea trout, gray&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5010, 3150 ],[ &quot;Shark, Dogfish&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7703, 1778 ],[ &quot;Snapper, red&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5048, 11902 ],[ &quot;Swordfish&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9409, 19831 ],[ &quot;Tuna&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 51854, 93497 ],[ &quot;Whiting (Atlantic, silver)&quot;,2001,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 28479, 13232 ],[ &quot;Whiting (Pacific, hake)&quot;,2001,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.793e+05, 16147 ],[ &quot;Clams&quot;,2001,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.2276e+05,1.6199e+05 ],[ &quot;Crabs&quot;,2001,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,2.7225e+05,3.8167e+05 ],[ &quot;Lobsters: American&quot;,2001,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 73637,2.5433e+05 ],[ &quot;Oysters&quot;,2001,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 32673, 80946 ],[ &quot;Scallops, Calico&quot;,2001,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,2001,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 46958,1.7535e+05 ],[ &quot;Shrimp&quot;,2001,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.2448e+05,5.6855e+05 ],[ &quot;Squid, Atlantic&quot;,2001,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 40167, 22621 ],[ &quot;Squid, Pacific&quot;,2001,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.9153e+05, 17834 ],[ &quot;Anchovies&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10762, 623 ],[ &quot;Bluefish&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6973, 2394 ],[ &quot;Butterfish&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 3363, 1431 ],[ &quot;Cod, Atlantic&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 29841, 30715 ],[ &quot;Cod, Pacific&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.1283e+05, 96206 ],[ &quot;Croaker&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 26146, 8029 ],[ &quot;Flounders&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.727e+05,1.0237e+05 ],[ &quot;Haddock&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16652, 19080 ],[ &quot;Halibut&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 82044,1.356e+05 ],[ &quot;Herring, sea; Atlantic &quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.3587e+05, 9106 ],[ &quot;Herring, sea; Pacific&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 78408, 11534 ],[ &quot;Jack mackerel&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 2262, 207 ],[ &quot;Mackerel, Chub&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7706, 496 ],[ &quot;Menhaden&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.7506e+06,1.051e+05 ],[ &quot;Mullet&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 15609, 9971 ],[ &quot;Ocean perch, Atlantic&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 811, 487 ],[ &quot;Ocean perch, Pacific&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 45390, 4613 ],[ &quot;Pollock, Atlantic&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7894, 6200 ],[ &quot;Pollock, Alaska&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.3411e+06,2.037e+05 ],[ &quot;Rockfish&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 36039, 17811 ],[ &quot;Sablefish&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 40908, 78281 ],[ &quot;Salmon&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.6718e+05,1.5501e+05 ],[ &quot;Scup or porgy&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7749, 5263 ],[ &quot;Sea trout, gray&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4765, 3064 ],[ &quot;Shark, Dogfish&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8513, 1913 ],[ &quot;Snapper, red&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4522, 10196 ],[ &quot;Swordfish&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8642, 17106 ],[ &quot;Tuna&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 49358, 84116 ],[ &quot;Whiting (Atlantic, silver)&quot;,2002,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 17622, 7454 ],[ &quot;Whiting (Pacific, hake)&quot;,2002,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,2.8571e+05, 13584 ],[ &quot;Clams&quot;,2002,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.3008e+05,1.6722e+05 ],[ &quot;Crabs&quot;,2002,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.076e+05,3.977e+05 ],[ &quot;Lobsters: American&quot;,2002,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 82252,2.9333e+05 ],[ &quot;Oysters&quot;,2002,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 34397, 89071 ],[ &quot;Scallops, Calico&quot;,2002,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,2002,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 53056,2.0371e+05 ],[ &quot;Shrimp&quot;,2002,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.1673e+05,4.6088e+05 ],[ &quot;Squid, Atlantic&quot;,2002,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 44856, 25284 ],[ &quot;Squid, Pacific&quot;,2002,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.6068e+05, 18262 ],[ &quot;Anchovies&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4253, 342 ],[ &quot;Bluefish&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7509, 2477 ],[ &quot;Butterfish&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 2840, 1097 ],[ &quot;Cod, Atlantic&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 23628, 27559 ],[ &quot;Cod, Pacific&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.6754e+05,1.5962e+05 ],[ &quot;Croaker&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 28777, 9218 ],[ &quot;Flounders&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.646e+05, 94454 ],[ &quot;Haddock&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 14960, 16962 ],[ &quot;Halibut&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 79515,1.7219e+05 ],[ &quot;Herring, sea; Atlantic &quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.1252e+05, 15554 ],[ &quot;Herring, sea; Pacific&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 74332, 10456 ],[ &quot;Jack mackerel&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 510, 73 ],[ &quot;Mackerel, Chub&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9658, 676 ],[ &quot;Menhaden&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.5993e+06, 96080 ],[ &quot;Mullet&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16054, 12496 ],[ &quot;Ocean perch, Atlantic&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 801, 412 ],[ &quot;Ocean perch, Pacific&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 47249, 1528 ],[ &quot;Pollock, Atlantic&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 10569, 5399 ],[ &quot;Pollock, Alaska&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.3618e+06,2.0318e+05 ],[ &quot;Rockfish&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 35544, 15724 ],[ &quot;Sablefish&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 47901,1.0019e+05 ],[ &quot;Salmon&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,6.7412e+05,2.0089e+05 ],[ &quot;Scup or porgy&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10408, 6409 ],[ &quot;Sea trout, gray&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 2001, 1494 ],[ &quot;Shark, Dogfish&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 5529, 1173 ],[ &quot;Snapper, red&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 2834, 6838 ],[ &quot;Swordfish&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 9356, 18059 ],[ &quot;Tuna&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 61886, 86882 ],[ &quot;Whiting (Atlantic, silver)&quot;,2003,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 19066, 9330 ],[ &quot;Whiting (Pacific, hake)&quot;,2003,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.0936e+05, 17153 ],[ &quot;Clams&quot;,2003,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.2781e+05,1.6249e+05 ],[ &quot;Crabs&quot;,2003,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.3207e+05,4.8084e+05 ],[ &quot;Lobsters: American&quot;,2003,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 73657,2.9215e+05 ],[ &quot;Oysters&quot;,2003,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 37103,1.036e+05 ],[ &quot;Scallops, Calico&quot;,2003,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,2003,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 56023,2.2917e+05 ],[ &quot;Shrimp&quot;,2003,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.1527e+05,4.2070e+05 ],[ &quot;Squid, Atlantic&quot;,2003,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 40732, 24251 ],[ &quot;Squid, Pacific&quot;,2003,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.0071e+05, 25433 ],[ &quot;Anchovies&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 15474, 819 ],[ &quot;Bluefish&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 8202, 2303 ],[ &quot;Butterfish&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 2447, 1071 ],[ &quot;Cod, Atlantic&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16069, 21691 ],[ &quot;Cod, Pacific&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.9065e+05,1.4898e+05 ],[ &quot;Croaker&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 25650, 8709 ],[ &quot;Flounders&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.5978e+05,1.2396e+05 ],[ &quot;Haddock&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 18171, 18529 ],[ &quot;Halibut&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 80056,1.7640e+05 ],[ &quot;Herring, sea; Atlantic &quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.8928e+05, 15084 ],[ &quot;Herring, sea; Pacific&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 75330, 15246 ],[ &quot;Jack mackerel&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 2672, 275 ],[ &quot;Mackerel, Chub&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 8125, 573 ],[ &quot;Menhaden&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,1.4976e+06, 72447 ],[ &quot;Mullet&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16445, 10371 ],[ &quot;Ocean perch, Atlantic&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 877, 458 ],[ &quot;Ocean perch, Pacific&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 45421, 4886 ],[ &quot;Pollock, Atlantic&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 11177, 5610 ],[ &quot;Pollock, Alaska&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,3.3534e+06,2.7163e+05 ],[ &quot;Rockfish&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 31081, 13358 ],[ &quot;Sablefish&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 52862,1.3547e+05 ],[ &quot;Salmon&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,7.3873e+05,3.0264e+05 ],[ &quot;Scup or porgy&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 9717, 6456 ],[ &quot;Sea trout, gray&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 1579, 1275 ],[ &quot;Shark, Dogfish&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 5179, 1412 ],[ &quot;Snapper, red&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4847, 12161 ],[ &quot;Swordfish&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 6389, 14639 ],[ &quot;Tuna&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 56541, 91138 ],[ &quot;Whiting (Atlantic, silver)&quot;,2004,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 18965, 9918 ],[ &quot;Whiting (Pacific, hake)&quot;,2004,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.7453e+05, 21823 ],[ &quot;Clams&quot;,2004,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,1.1941e+05,1.6641e+05 ],[ &quot;Crabs&quot;,2004,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,3.1564e+05,4.4982e+05 ],[ &quot;Lobsters: American&quot;,2004,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 88386,3.6601e+05 ],[ &quot;Oysters&quot;,2004,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 38654,1.1212e+05 ],[ &quot;Scallops, Calico&quot;,2004,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,2004,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 64580,3.2138e+05 ],[ &quot;Shrimp&quot;,2004,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,3.093e+05,4.2762e+05 ],[ &quot;Squid, Atlantic&quot;,2004,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 90185, 42614 ],[ &quot;Squid, Pacific&quot;,2004,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 89580, 19831 ],[ &quot;Anchovies&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 25021, 1122 ],[ &quot;Bluefish&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 7184, 2491 ],[ &quot;Butterfish&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 1039, 825 ],[ &quot;Cod, Atlantic&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 13910, 20816 ],[ &quot;Cod, Pacific&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,5.4875e+05,1.5074e+05 ],[ &quot;Croaker&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 24210, 9055 ],[ &quot;Flounders&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,4.1941e+05,1.351e+05 ],[ &quot;Haddock&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 16627, 19023 ],[ &quot;Halibut&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 76955,1.7716e+05 ],[ &quot;Herring, sea; Atlantic &quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,2.1556e+05, 20467 ],[ &quot;Herring, sea; Pacific&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 87295, 13801 ],[ &quot;Jack mackerel&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 656, 76 ],[ &quot;Mackerel, Chub&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 7852, 576 ],[ &quot;Menhaden&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,1.2437e+06, 62455 ],[ &quot;Mullet&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10697, 7318 ],[ &quot;Ocean perch, Atlantic&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 1243, 715 ],[ &quot;Ocean perch, Pacific&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 42935, 5755 ],[ &quot;Pollock, Atlantic&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 14350, 7878 ],[ &quot;Pollock, Alaska&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;,3.4105e+06,3.0693e+05 ],[ &quot;Rockfish&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 30008, 12220 ],[ &quot;Sablefish&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 51083,1.3623e+05 ],[ &quot;Salmon&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,8.9944e+05,3.3067e+05 ],[ &quot;Scup or porgy&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 10071, 7735 ],[ &quot;Sea trout, gray&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 1294, 1062 ],[ &quot;Shark, Dogfish&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 4993, 1244 ],[ &quot;Snapper, red&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 4188, 11579 ],[ &quot;Swordfish&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 6980, 17189 ],[ &quot;Tuna&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;, 44394, 85707 ],[ &quot;Whiting (Atlantic, silver)&quot;,2005,&quot;fish&quot;,&quot;quantity</em>1000lbs&quot;, 16561, 8284 ],[ &quot;Whiting (Pacific, hake)&quot;,2005,&quot;fish&quot;,&quot;quantity<em>1000lbs&quot;,5.6693e+05, 29047 ],[ &quot;Clams&quot;,2005,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.0562e+05,1.7354e+05 ],[ &quot;Crabs&quot;,2005,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;,2.9775e+05,4.1304e+05 ],[ &quot;Lobsters: American&quot;,2005,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;, 87550,4.1419e+05 ],[ &quot;Oysters&quot;,2005,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 33957,1.1061e+05 ],[ &quot;Scallops, Calico&quot;,2005,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,null,null ],[ &quot;Scallops, sea&quot;,2005,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 56704,4.3352e+05 ],[ &quot;Shrimp&quot;,2005,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,2.6112e+05,4.0651e+05 ],[ &quot;Squid, Atlantic&quot;,2005,&quot;shellfish&quot;,&quot;quantity<em>1000lbs&quot;, 63422, 37392 ],[ &quot;Squid, Pacific&quot;,2005,&quot;shellfish&quot;,&quot;quantity</em>1000lbs&quot;,1.2571e+05, 31670 ] ];data.addColumn(&#39;string&#39;,&#39;species&#39;);data.addColumn(&#39;number&#39;,&#39;year&#39;);data.addColumn(&#39;string&#39;,&#39;type&#39;);data.addColumn(&#39;string&#39;,&#39;var&#39;);data.addColumn(&#39;number&#39;,&#39;quantity<em>1000lbs&#39;);data.addColumn(&#39;number&#39;,&#39;value</em>1000dollars&#39;);data.addRows(datajson);var chart = new google.visualization.MotionChart( document.getElementById(&#39;MotionChart<em>2011-01-17-08-09-24&#39;));var options ={};options[&quot;width&quot;] = 600;options[&quot;height&quot;] = 500;chart.draw(data,options);}</script><br />&lt;div id=&quot;MotionChart</em>2011-01-17-08-09-24&quot; style=&quot;height: 500px; width: 600px;&quot;&gt;</div>Data: fishdata, Chart ID: MotionChart<em>2011-01-17-08-09-24<br /><br /><br />R version 2.12.1 (2010-12-16),<br /><a href="http://code.google.com/apis/visualization/terms.html"><br />Google Terms of Use</a><br /><br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;">fishdatagg2 &lt;- ddply<span style="color: #009900;">(</span>fish2<span style="color: #339933;">,</span>.<span style="color: #009900;">(</span>species<span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/stats/var"><span style="color: #003399; font-weight: bold;">var</span></a><span style="color: #009900;">)</span><span style="color: #339933;">,</span>summarise<span style="color: #339933;">,</span><br /> <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a> = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>value<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> se = <a href="http://inside-r.org/r-doc/stats/sd"><span style="color: #003399; font-weight: bold;">sd</span></a><span style="color: #009900;">(</span>value<span style="color: #009900;">)</span>/sqrt<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>value<span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /><span style="color: #009900;">)</span><br />fishdatagg2 &lt;- <a href="http://inside-r.org/r-doc/base/subset"><span style="color: #003399; font-weight: bold;">subset</span></a><span style="color: #009900;">(</span>fishdatagg2<span style="color: #339933;">,</span>fishdatagg2$var %in% <a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;quantity</em>1000lbs&quot;</span><span style="color: #339933;">,</span><span style="color: blue;">&quot;value<em>1000dollars&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br />limit3 &lt;- aes<span style="color: #009900;">(</span>ymax = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a> + se<span style="color: #339933;">,</span> ymin = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a> - se<span style="color: #009900;">)</span><br />bysppfgrid &lt;- <a href="http://www.blogger.com/packages/ggplot">ggplot</a><span style="color: #009900;">(</span>fishdatagg2<span style="color: #339933;">,</span>aes<span style="color: #009900;">(</span>x=<a href="http://inside-r.org/r-doc/stats/reorder"><span style="color: #003399; font-weight: bold;">reorder</span></a><span style="color: #009900;">(</span>species<span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/rank"><span style="color: #003399; font-weight: bold;">rank</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span>y=<a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #339933;">,</span>colour=species<span style="color: #009900;">)</span><span style="color: #009900;">)</span> + geom</em>point<span style="color: #009900;">(</span><span style="color: #009900;">)</span> + geom<em>errorbar<span style="color: #009900;">(</span>limit3<span style="color: #009900;">)</span> + facet</em>grid<span style="color: #009900;">(</span>. ~ <a href="http://inside-r.org/r-doc/stats/var"><span style="color: #003399; font-weight: bold;">var</span></a><span style="color: #339933;">,</span> scales=<span style="color: blue;">&quot;free&quot;</span><span style="color: #009900;">)</span> + opts<span style="color: #009900;">(</span>legend.position=<span style="color: blue;">&quot;none&quot;</span><span style="color: #009900;">)</span> + coord<em>flip<span style="color: #009900;">(</span><span style="color: #009900;">)</span> + scale</em>y<em>continuous<span style="color: #009900;">(</span>trans=<span style="color: blue;">&quot;log&quot;</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;bysppfgrid.jpeg&quot;</span><span style="color: #009900;">)</span></pre></div></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br /><br /><div class="separator" style="clear: both; text-align: center;">&lt;a href=&quot;http://2.bp.blogspot.com/</em>fANWq796z-w/TTRvw6n41xI/AAAAAAAAEYk/aaoDVQ<em>C8kk/s1600/bysppfgrid.jpeg&quot; imageanchor=&quot;1&quot; style=&quot;clear: left; float: left; margin-bottom: 1em; margin-right: 1em;&quot;&gt;&lt;img border=&quot;0&quot; height=&quot;640&quot; src=&quot;http://2.bp.blogspot.com/</em>fANWq796z-w/TTRvw6n41xI/AAAAAAAAEYk/aaoDVQ_C8kk/s640/bysppfgrid.jpeg&quot; width=&quot;500&quot; /&gt;</a></div></p> @@ -15535,7 +15804,7 @@ gggraph<span class="p">(</span>z<span class="p&quo R and Google Visualization API: Wikispeedia - 2011-01-17T17:48:00-08:00 + 2011-01-18T02:48:00+01:00 http://recology.info//2011/01/r-and-google-visualization-api-wikispeedia <p>Wikispeedia is a website trying to gather all speed limit signs on Earth. I recently created a Google Visualization for some of their data, specifically on speed limit signs that change speed throughout the day. Check it out <a href="http://groups.google.com/group/wikispeedia/browse_thread/thread/c9c712125a597b16">here</a>. Here is how to see and comment on what they are doing: <a href="http://www.wikispeedia.org/">website</a>, and <a href="http://groups.google.com/group/wikispeedia?lnk=">Google groups</a>.</p> @@ -15544,7 +15813,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Bipartite networks and R - 2011-01-14T07:05:00-08:00 + 2011-01-14T16:05:00+01:00 http://recology.info//2011/01/bipartite-networks-and-r <p>Earlier, I posted about <a href="http://r-ecology.blogspot.com/2011/01/ecological-networks-from-abundance.html">generating networks from abundance distributions that you specify</a>. If this post was interesting, check out Jeff Kilpatrick&#39;s website, where he provides code he produced in R and Octave to compare real bipartite networks to ones generated based on ecological variables measured in the field (in our case it was abundance, body size, and nectar production). We used that code for a paper we published]<a href="http://www.springerlink.com/content/1055615l6m74mp30/">paper</a>. Code was modified from code produced by <a href="http://www.cricyt.edu.ar/interactio/dvazquez/html/index_e.html">Diego P. Vazquez</a>.</p> @@ -15553,7 +15822,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Just for fun: Recovery.gov data snooping - 2011-01-11T08:52:00-08:00 + 2011-01-11T17:52:00+01:00 http://recology.info//2011/01/just-for-fun-recoverygov-data-snooping <p><script async="true" src="http://pixel.propublica.org/pixel.js" type="text/javascript"></script><br /><br /><div class="separator" style="clear: both; text-align: center;"></div><a href="http://projects.propublica.org/docdollars/" style="margin-left: 1em; margin-right: 1em;"><img border="0" src="http://4.bp.blogspot.com/_fANWq796z-w/TS_ACipsARI/AAAAAAAAEYA/s0V0Qr5GY0g/s1600/data-by-propublica.jpg" style="cursor: move;" /></a><br /><br /><div class="separator" style="clear: both; text-align: left;">Okay, so this isn&#39;t ecology related at all, but I like exploring data sets. So here goes...</div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;">Propublica has some awesome data sets available at their website: http://www.propublica.org/tools/</div><div class="separator" style="clear: both; text-align: left;">I played around with their data set on Recovery.gov (see hyperlink below in code). Here&#39;s some figures:</div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;">Mean award amount, ranked by mean amount, and also categorized by number of grants received (&quot;nfund&quot;) by state (by size and color of point).&nbsp; Yes, there are 56 &quot;states&quot;, which includes things like Northern Marian Islands (MP). Notice that California got the largest number of awards, but the mean award size was relatively small. </div><div class="separator" style="clear: both; text-align: center;"><a href="http://1.bp.blogspot.com/_fANWq796z-w/TSuMsqigYNI/AAAAAAAAEXo/XIv_2rHJ_J8/s1600/awardbystate.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="640" src="http://1.bp.blogspot.com/_fANWq796z-w/TSuMsqigYNI/AAAAAAAAEXo/XIv_2rHJ_J8/s640/awardbystate.jpeg" width="572" /></a></div><div class="separator" style="clear: both; text-align: left;">Here is a figure by government organization that awarded each award, by mean award size (y-axis), number of awards (x-axis), and number of jobs created (numjobs=text size). Notice that the FCC (Federal Communications Commission) created nearly the most jobs despite not giving very large awards (although they did give a lot of awards). </div><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_fANWq796z-w/TSyI0xFggbI/AAAAAAAAEX0/36J9f_n89tY/s1600/awardbyagency.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="640" src="http://3.bp.blogspot.com/_fANWq796z-w/TSyI0xFggbI/AAAAAAAAEX0/36J9f_n89tY/s640/awardbyagency.jpeg" width="609" /></a></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;">Here is a figure of mean awards by state on a map of the US:</div><div class="separator" style="clear: both; text-align: center;"><a href="http://4.bp.blogspot.com/_fANWq796z-w/TSuf8AnHDHI/AAAAAAAAEXs/pGxY_7ej7e8/s1600/bystatemapmeans.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="537" src="http://4.bp.blogspot.com/_fANWq796z-w/TSuf8AnHDHI/AAAAAAAAEXs/pGxY_7ej7e8/s640/bystatemapmeans.jpeg" width="640" /></a></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;">And by number of awards by state:</div><div class="separator" style="clear: both; text-align: center;"><a href="http://3.bp.blogspot.com/_fANWq796z-w/TSuf9SKo2KI/AAAAAAAAEXw/QxDiC-DbvFc/s1600/bystatemapnumber.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="537" src="http://3.bp.blogspot.com/_fANWq796z-w/TSuf9SKo2KI/AAAAAAAAEXw/QxDiC-DbvFc/s640/bystatemapnumber.jpeg" width="640" /></a></div><div class="separator" style="clear: both; text-align: left;"><br /></div><div class="separator" style="clear: both; text-align: left;"><br /></div><br /><br /><br />Here is the code:<br /><br /><div style="overflow: auto;"><div class="geshifilter"><pre class="r geshifilter-R" style="font-family: monospace;"><span style="color: #666666; font-style: italic;">################################################################################</span><br /><span style="color: #666666; font-style: italic;">#################### Propublica Recovery.gov data ####################</span><br /><span style="color: #666666; font-style: italic;">################################################################################</span><br /><a href="http://inside-r.org/r-doc/utils/install.packages"><span style="color: #003399; font-weight: bold;">install.packages</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;ggplot2&quot;</span><span style="color: #339933;">,</span><span style="color: blue;">&quot;maps&quot;</span><span style="color: #339933;">,</span><span style="color: blue;">&quot;stringr&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span><a href="http://www.blogger.com/packages/ggplot2">ggplot2</a><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span><a href="http://www.blogger.com/packages/maps">maps</a><span style="color: #009900;">)</span> <br /><a href="http://inside-r.org/r-doc/base/library"><span style="color: #003399; font-weight: bold;">library</span></a><span style="color: #009900;">(</span><a href="http://www.blogger.com/packages/stringr">stringr</a><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/base/setwd"><span style="color: #003399; font-weight: bold;">setwd</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;/Mac/R<em>stuff/Blog</em>etc&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># Set working directory</span><br />theme<em>set<span style="color: #009900;">(</span>theme</em>bw<span style="color: #009900;">(</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Read propublica data from file (download from here: http://propublica.s3.amazonaws.com/assets/recoverygov/propublica-recoverygov-primary-2.xls</span><br />propubdat &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;propublica-recoverygov-primary-2.csv&quot;</span><span style="color: #009900;">)</span><br /><a href="http://inside-r.org/r-doc/utils/str"><span style="color: #003399; font-weight: bold;">str</span></a><span style="color: #009900;">(</span>propubdat<span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Summarize data</span><br />fundbystate &lt;- ddply<span style="color: #009900;">(</span>propubdat<span style="color: #339933;">,</span>.<span style="color: #009900;">(</span>prime<em>state<span style="color: #009900;">)</span><span style="color: #339933;">,</span>summarise<span style="color: #339933;">,</span><br /> meanfund = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>award</em>amount<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> sefund = <a href="http://inside-r.org/r-doc/stats/sd"><span style="color: #003399; font-weight: bold;">sd</span></a><span style="color: #009900;">(</span>award<em>amount<span style="color: #009900;">)</span>/sqrt<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>award</em>amount<span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> nfund = <a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>award<em>amount<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> numjobs = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>number</em>of<em>jobs<span style="color: #009900;">)</span><br /><span style="color: #009900;">)</span><br />&nbsp;<br />fundbyagency &lt;- ddply<span style="color: #009900;">(</span>propubdat<span style="color: #339933;">,</span>.<span style="color: #009900;">(</span>funding</em>agency<em>name<span style="color: #009900;">)</span><span style="color: #339933;">,</span>summarise<span style="color: #339933;">,</span><br /> meanfund = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>award</em>amount<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> sefund = <a href="http://inside-r.org/r-doc/stats/sd"><span style="color: #003399; font-weight: bold;">sd</span></a><span style="color: #009900;">(</span>award<em>amount<span style="color: #009900;">)</span>/sqrt<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>award</em>amount<span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> nfund = <a href="http://inside-r.org/r-doc/base/length"><span style="color: #003399; font-weight: bold;">length</span></a><span style="color: #009900;">(</span>award<em>amount<span style="color: #009900;">)</span><span style="color: #339933;">,</span><br /> numjobs = <a href="http://inside-r.org/r-doc/base/mean"><span style="color: #003399; font-weight: bold;">mean</span></a><span style="color: #009900;">(</span>number</em>of<em>jobs<span style="color: #009900;">)</span><br /><span style="color: #009900;">)</span><br />&nbsp;<br />&nbsp;<br />fun1 &lt;- <a href="http://inside-r.org/r-doc/base/function"><span style="color: #003399; font-weight: bold;">function</span></a><span style="color: #009900;">(</span>a<span style="color: #009900;">)</span> <span style="color: #009900;">{</span>str</em>c<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/paste"><span style="color: #003399; font-weight: bold;">paste</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/stats/na.omit"><span style="color: #003399; font-weight: bold;">na.omit</span></a><span style="color: #009900;">(</span>str<em>extract<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/unlist"><span style="color: #003399; font-weight: bold;">unlist</span></a><span style="color: #009900;">(</span>str</em>split<span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/unlist"><span style="color: #003399; font-weight: bold;">unlist</span></a><span style="color: #009900;">(</span><a href="http://inside-r.org/r-doc/base/as.character"><span style="color: #003399; font-weight: bold;">as.character</span></a><span style="color: #009900;">(</span>a<span style="color: #009900;">[</span><span style="color: #cc66cc;">1</span><span style="color: #009900;">]</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot; &quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> <span style="color: blue;">&quot;[A-Z]{1}&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span> <a href="http://inside-r.org/r-doc/nlme/collapse"><span style="color: #003399; font-weight: bold;">collapse</span></a>=<span style="color: blue;">&quot;&quot;</span><span style="color: #009900;">)</span><span style="color: #009900;">}</span> <span style="color: #666666; font-style: italic;"># Fxn to make funding agency name abbreviations within ddply below</span><br />&nbsp;<br />fundbyagency2 &lt;- ddply<span style="color: #009900;">(</span>fundbyagency<span style="color: #339933;">,</span>.<span style="color: #009900;">(</span>funding<em>agency</em>name<span style="color: #009900;">)</span><span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/transform"><span style="color: #003399; font-weight: bold;">transform</span></a><span style="color: #339933;">,</span> <span style="color: #666666; font-style: italic;"># add to table funding agency name abbreviations</span><br /> agency<em>abbrev = fun1<span style="color: #009900;">(</span>funding</em>agency<em>name<span style="color: #009900;">)</span><br /><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Plot data, means and se&#39;s by state</span><br />limits &lt;- aes<span style="color: #009900;">(</span>ymax = meanfund + sefund<span style="color: #339933;">,</span> ymin = meanfund - sefund<span style="color: #009900;">)</span><br />dodge &lt;- position</em>dodge<span style="color: #009900;">(</span>width=<span style="color: #cc66cc;">0.6</span><span style="color: #009900;">)</span><br />awardbystate &lt;- <a href="http://www.blogger.com/packages/ggplot">ggplot</a><span style="color: #009900;">(</span>fundbystate<span style="color: #339933;">,</span>aes<span style="color: #009900;">(</span>x=<a href="http://inside-r.org/r-doc/stats/reorder"><span style="color: #003399; font-weight: bold;">reorder</span></a><span style="color: #009900;">(</span>prime<em>state<span style="color: #339933;">,</span>meanfund<span style="color: #009900;">)</span><span style="color: #339933;">,</span>y=meanfund<span style="color: #339933;">,</span>colour=nfund<span style="color: #009900;">)</span><span style="color: #009900;">)</span> + geom</em>point<span style="color: #009900;">(</span>aes<span style="color: #009900;">(</span>size=nfund<span style="color: #009900;">)</span><span style="color: #339933;">,</span>position=dodge<span style="color: #009900;">)</span> + coord<em>flip<span style="color: #009900;">(</span><span style="color: #009900;">)</span> + geom</em>errorbar<span style="color: #009900;">(</span>limits<span style="color: #339933;">,</span> width=<span style="color: #cc66cc;">0.2</span><span style="color: #339933;">,</span>position=dodge<span style="color: #009900;">)</span> + opts<span style="color: #009900;">(</span>panel.grid.major = theme<em>blank<span style="color: #009900;">(</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span>panel.grid.minor=theme</em>blank<span style="color: #009900;">(</span><span style="color: #009900;">)</span><span style="color: #339933;">,</span>legend.position=<a href="http://inside-r.org/r-doc/base/c"><span style="color: #003399; font-weight: bold;">c</span></a><span style="color: #009900;">(</span><span style="color: #cc66cc;">0.7</span><span style="color: #339933;">,</span><span style="color: #cc66cc;">0.2</span><span style="color: #009900;">)</span><span style="color: #009900;">)</span> + labs<span style="color: #009900;">(</span>x=<span style="color: blue;">&quot;State&quot;</span><span style="color: #339933;">,</span>y=<span style="color: blue;">&quot;Mean grant amount awarded +/- 1 s.e.&quot;</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;awardbystate.jpeg&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># Plot data, means and se&#39;s by funding agency</span><br />limits2 &lt;- aes<span style="color: #009900;">(</span>ymax = meanfund + sefund<span style="color: #339933;">,</span> ymin = meanfund - sefund<span style="color: #009900;">)</span><br />dodge &lt;- position<em>dodge<span style="color: #009900;">(</span>width=<span style="color: #cc66cc;">0.6</span><span style="color: #009900;">)</span><br />awardbyagency &lt;- <a href="http://www.blogger.com/packages/ggplot">ggplot</a><span style="color: #009900;">(</span>fundbyagency2<span style="color: #339933;">,</span>aes<span style="color: #009900;">(</span>y=<a href="http://inside-r.org/r-doc/base/log"><span style="color: #003399; font-weight: bold;">log</span></a><span style="color: #009900;">(</span>meanfund<span style="color: #009900;">)</span><span style="color: #339933;">,</span>x=<a href="http://inside-r.org/r-doc/base/log"><span style="color: #003399; font-weight: bold;">log</span></a><span style="color: #009900;">(</span>nfund<span style="color: #009900;">)</span><span style="color: #339933;">,</span>label=agency</em>abbrev<span style="color: #009900;">)</span><span style="color: #009900;">)</span> + geom<em>text<span style="color: #009900;">(</span>aes<span style="color: #009900;">(</span>size=numjobs<span style="color: #009900;">)</span><span style="color: #009900;">)</span> <br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;awardbyagency.jpeg&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<br />&nbsp;<br /><span style="color: #666666; font-style: italic;"># On US map</span><br />fundbystate2 &lt;- <a href="http://inside-r.org/r-doc/utils/read.csv"><span style="color: #003399; font-weight: bold;">read.csv</span></a><span style="color: #009900;">(</span><span style="color: blue;">&quot;fundbystate.csv&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<br />states &lt;- map</em>data<span style="color: #009900;">(</span><span style="color: blue;">&quot;state&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># get state geographic data from the maps package</span><br />recovmap &lt;- <a href="http://inside-r.org/r-doc/base/merge"><span style="color: #003399; font-weight: bold;">merge</span></a><span style="color: #009900;">(</span>states<span style="color: #339933;">,</span>fundbystate2<span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/base/by"><span style="color: #003399; font-weight: bold;">by</span></a>=<span style="color: blue;">&quot;region&quot;</span><span style="color: #009900;">)</span> <span style="color: #666666; font-style: italic;"># merage datasets</span><br />&nbsp;<br />qplot<span style="color: #009900;">(</span>long<span style="color: #339933;">,</span>lat<span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a>=recovmap<span style="color: #339933;">,</span>group=group<span style="color: #339933;">,</span>fill=meanfund<span style="color: #339933;">,</span>geom=<span style="color: blue;">&quot;polygon&quot;</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;bystatemapmeans.jpeg&quot;</span><span style="color: #009900;">)</span><br />&nbsp;<br />qplot<span style="color: #009900;">(</span>long<span style="color: #339933;">,</span>lat<span style="color: #339933;">,</span><a href="http://inside-r.org/r-doc/utils/data"><span style="color: #003399; font-weight: bold;">data</span></a>=recovmap<span style="color: #339933;">,</span>group=group<span style="color: #339933;">,</span>fill=nfund<span style="color: #339933;">,</span>geom=<span style="color: blue;">&quot;polygon&quot;</span><span style="color: #009900;">)</span><br />ggsave<span style="color: #009900;">(</span><span style="color: blue;">&quot;bystatemapnumber.jpeg&quot;</span><span style="color: #009900;">)</span></pre></div></div><div class="separator" style="clear: both; text-align: center;"><br /></div><a href="http://www.inside-r.org/pretty-r" title="Created by Pretty R at inside-R.org">Created by Pretty R at inside-R.org</a><br /><br />And the text file fundbystate2 <a href="http://web.me.com/scott_c1/Scott_Chamberlain/Other_files/fundbystate.csv">here</a>. I had the make this file separately so I could get in the spelled out state names as they were not provided in the propublica dataset.<br /><br />Source and disclaimer:<br />Data provided by <a href="http://www.propublica.org/">Propublica</a>. Data may contain errors and/or omissions.</p> @@ -15562,7 +15831,7 @@ gggraph<span class="p">(</span>z<span class="p&quo R and Google Visualization API - 2011-01-08T17:33:00-08:00 + 2011-01-09T02:33:00+01:00 http://recology.info//2011/01/r-and-google-visualization-api <p>R interfaces with the powerful Google Visualization API with the package googleVis (see <a href="http://code.google.com/p/google-motion-charts-with-r/">here</a>). It&#39;s relatively easy to convert your graphics in R to interactive graphics to post on a web browser. And the graphics are quite nice, as seen below in a simple graph of some of my data collected from this summer on seed predation to <i>Helianthus annuus</i> seeds in Texas:<br /><br /><script src="http://www.google.com/jsapi" type="text/javascript"></script><br /><script type="text/javascript">google.load(&quot;visualization&quot;, &quot;1&quot;, { packages:[&quot;motionchart&quot;] });google.setOnLoadCallback(drawChart);function drawChart() {var data = new google.visualization.DataTable();var datajson = [ [ &quot;Site 1&quot;,new Date(2008,0,8),&quot;beall&quot;,&quot;far&quot;,2.7558, 119.7 ],[ &quot;Site 1&quot;,new Date(2009,0,8),&quot;beall&quot;,&quot;near&quot;,0.53571,128.83 ],[ &quot;Site 1&quot;,new Date(2010,0,8),&quot;mine field&quot;,&quot;near&quot;,0.36207,103.23 ],[ &quot;Site 2&quot;,new Date(2008,0,8),&quot;beall&quot;,&quot;near&quot;,0.051212,82.188 ],[ &quot;Site 2&quot;,new Date(2009,0,8),&quot;mine field&quot;,&quot;far&quot;, 0,45.664 ],[ &quot;Site 2&quot;,new Date(2010,0,8),&quot;mine field&quot;,&quot;near&quot;, 0,50.319 ],[ &quot;Site 3&quot;,new Date(2007,0,8),&quot;beall&quot;,&quot;far&quot;,0.30909,94.639 ],[ &quot;Site 3&quot;,new Date(2008,0,8),&quot;beall&quot;,&quot;near&quot;,0.02439,137.54 ],[ &quot;Site 3&quot;,new Date(2009,0,8),&quot;mine field&quot;,&quot;far&quot;,0.85294,97.058 ],[ &quot;Site 3&quot;,new Date(2010,0,8),&quot;mine field&quot;,&quot;near&quot;,0.050633,127.31 ],[ &quot;Site 4&quot;,new Date(2007,0,8),&quot;beall&quot;,&quot;far&quot;,0.34539,109.48 ],[ &quot;Site 4&quot;,new Date(2008,0,8),&quot;beall&quot;,&quot;near&quot;,0.27667,127.04 ],[ &quot;Site 4&quot;,new Date(2009,0,8),&quot;mine field&quot;,&quot;far&quot;,0.69652,138.99 ],[ &quot;Site 4&quot;,new Date(2010,0,8),&quot;mine field&quot;,&quot;near&quot;,0.16392,153.22 ],[ &quot;Site 5&quot;,new Date(2007,0,8),&quot;mine field&quot;,&quot;far&quot;,0.060811,124.68 ],[ &quot;Site 5&quot;,new Date(2008,0,8),&quot;mine field&quot;,&quot;near&quot;,0.12821, 103.6 ] ];data.addColumn(&#39;string&#39;,&#39;site2&#39;);data.addColumn(&#39;date&#39;,&#39;years&#39;);data.addColumn(&#39;string&#39;,&#39;seed<em>source&#39;);data.addColumn(&#39;string&#39;,&#39;near</em>far&#39;);data.addColumn(&#39;number&#39;,&#39;meanholes&#39;);data.addColumn(&#39;number&#39;,&#39;meanseeds&#39;);data.addRows(datajson);var chart = new google.visualization.MotionChart( document.getElementById(&#39;MotionChart<em>2011-01-08-19-31-57&#39;));var options ={};options[&quot;width&quot;] = 600;options[&quot;height&quot;] = 500;chart.draw(data,options);}</script><br />&lt;div id=&quot;MotionChart</em>2011-01-08-19-31-57&quot; style=&quot;height: 500px; width: 600px;&quot;&gt;</div>Data: data2, Chart ID: MotionChart_2011-01-08-19-31-57<br /><br /><br />R version 2.12.1 (2010-12-16),<br /><a href="http://code.google.com/apis/visualization/terms.html"><br />Google Terms of Use</a><br /><br /><br /><br /><br /><br />&lt;%@include file=&quot;../src/simpleFooter.rsp&quot;%&gt;</p> @@ -15571,7 +15840,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Ecological networks from abundance distributions - 2011-01-06T06:58:00-08:00 + 2011-01-06T15:58:00+01:00 http://recology.info//2011/01/ecological-networks-from-abundance <p>Another grad student and I tried recently to make a contribution to our understanding of the relationship between ecological network structure (e.g., nestedness) and community structure (e.g., evenness)...</p> @@ -15588,7 +15857,7 @@ gggraph<span class="p">(</span>z<span class="p&quo R-bloggers - 2011-01-05T06:11:00-08:00 + 2011-01-05T15:11:00+01:00 http://recology.info//2011/01/r-bloggers <p>Just a quick FYI note in case you haven&#39;t seen this site.</p> @@ -15599,7 +15868,7 @@ gggraph<span class="p">(</span>z<span class="p&quo New approach to analysis of phylogenetic community structure - 2011-01-05T05:54:00-08:00 + 2011-01-05T14:54:00+01:00 http://recology.info//2011/01/new-approach-to-analysis-of-phylogenetic-community-structure <p>Anthony Ives, of University of Wisconsin-Madison, and Matthew Helmus of the Xishuangbanna Tropical Botanical Garden, present a new statistical method for analyzing phylogenetic community structure in an early view paper in Ecological Monographs. See the abstract <a href="http://www.esajournals.org/doi/abs/10.1890/10-1264.1">here</a>. </p> @@ -15614,7 +15883,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Ngram ecological terms - 2010-12-29T18:43:00-08:00 + 2010-12-30T03:43:00+01:00 http://recology.info//2010/12/ngram-ecological-terms <p>The recent availability of google ngram data is a great source of data on language use. Here are some terms from ecology from 1890 to 2000 (from here: http://ngrams.googlelabs.com/). Note that the word &quot;ecology&quot; doesn&#39;t appear at all until about 1890. </p> @@ -15625,7 +15894,7 @@ gggraph<span class="p">(</span>z<span class="p&quo Phylogenetic meta-analysis in R using Phylometa - 2010-12-28T05:15:00-08:00 + 2010-12-28T14:15:00+01:00 http://recology.info//2010/12/phylogenetic-meta-analysis-in-r-using-phylometa <p>Here is some code to run Phylometa from R. Phylometa is a program that conducts phylogenetic meta-analyses. The great advantage of the approach below is that you can easily run Phylometa from R, and manipulate the output from Phylometa in R. <br /><br />Phylometa was created by Marc Lajeunesse at University of South Florida, and is described in his 2009 AmNat <a href="http://lajeunesse.myweb.usf.edu/publications.html">paper</a>. Phylometa can be downloaded free <a href="http://lajeunesse.myweb.usf.edu/publications.html">here</a>.</p> @@ -15640,7 +15909,7 @@ gggraph<span class="p">(</span>z<span class="p&quo A new blog about using R for ecology and evolution - 2010-12-27T09:42:00-08:00 + 2010-12-27T18:42:00+01:00 http://recology.info//2010/12/a-new-blog-about-using-r-for-ecology-and-evolution <p>I am starting this blog not because I am a seasoned code writer, but because I am learning how to use R specifically for ecology and evolution, and figured many others might have the same questions I have. If I find cool solutions I will post them here for all to view, criticize, improve, etc.</p> diff --git a/_site/feed.R.xml b/_site/feed.R.xml index 2554990a07..2d411e20df 100644 --- a/_site/feed.R.xml +++ b/_site/feed.R.xml @@ -5,6 +5,274 @@ Posts tagged as 'R' http://recology.info/ + + binomen - Tools for slicing and dicing taxonomic names + <p>The first version of <code>binomen</code> is now up on [CRAN][binomecran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to <a href="https://github.com/ropensci/taxize">taxize</a>, where you can get taxonomic data on taxonomic names from the web.</p> + +<p>The classes (S3):</p> + +<ul> +<li><code>taxon</code></li> +<li><code>taxonref</code></li> +<li><code>taxonrefs</code></li> +<li><code>binomial</code></li> +<li><code>grouping</code> (i.e., classification - used different term to avoid conflict with classification in <code>taxize</code>)</li> +</ul> + +<p>For example, the <code>binomial</code> class is defined by a genus, epithet, authority, and optional full species name and canonical version.</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">binomial<span class="p">(</span><span class="s">&quot;Poa&quot;</span><span class="p">,</span> <span class="s">&quot;annua&quot;</span><span class="p">,</span> authority<span class="o">=</span><span class="s">&quot;L.&quot;</span><span class="p">)</span> +</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="o">&lt;</span>binomial<span class="o">&gt;</span> + genus<span class="o">:</span> Poa + epithet<span class="o">:</span> annua + canonical<span class="o">:</span> + species<span class="o">:</span> + authority<span class="o">:</span> L. +</code></pre></div> +<p>The package has a suite of functions to work on these taxonomic classes:</p> + +<ul> +<li><code>gethier()</code> - get hierarchy from a <code>taxon</code> class</li> +<li><code>scatter()</code> - make each row in taxonomic data.frame (<code>taxondf</code>) a separate <code>taxon</code> object within a single <code>taxa</code> object</li> +<li><code>assemble()</code> - make a <code>taxa</code> object into a <code>taxondf</code> data.frame</li> +<li><code>pick()</code> - pick out one or more taxonomic groups</li> +<li><code>pop()</code> - pop out (drop) one or more taxonomic groups</li> +<li><code>span()</code> - pick a range between two taxonomic groups (inclusive)</li> +<li><code>strain()</code> - filter by taxonomic groups, like dplyr&#39;s filter</li> +<li><code>name()</code> - get the taxon name for each <code>taxonref</code> object</li> +<li><code>uri()</code> - get the reference uri for each <code>taxonref</code> object</li> +<li><code>rank()</code> - get the taxonomic rank for each <code>taxonref</code> object</li> +<li><code>id()</code> - get the reference uri for each <code>taxonref</code> object</li> +</ul> + +<p>The approach in this package I suppose is sort of like <code>split-apply-combine</code> from <code>plyr</code>/<code>dplyr</code>, whereas this is aims to make it easy to do with taxonomic names.</p> + +<h2>Install</h2> + +<p>For examples below, you&#39;ll need the development version:</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">install.packages<span class="p">(</span><span class="s">&quot;binomen&quot;</span><span class="p">)</span> +</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;binomen&quot;</span><span class="p">)</span> +</code></pre></div> +<h2>Make a taxon</h2> + +<p>Make a taxon object</p> +<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="p">(</span>obj <span class="o">&lt;-</span> make_taxon<span class="p">(</span>genus<span class="o">=</span><span class="s">&quot;Poa&quot;</span><span class="p">,</span> epithet<span class="o">=</span><span class="s">&quot;annua&quot;</span><span class="p">,</span> authority<span class="o">=</span><span class="s">&quot;L.&quot;</span><span class="p">,</span> + family<span class="o">=</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span> clazz<span class="o">=</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span> kingdom<span class="o">=</span><span class="s">&#39;Plantae&#39;</span><span class="p">,</span> variety<span class="o">=</span><span class="s">&#39;annua&#39;</span><span class="p">))</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>Index to various parts of the object</p> + +<p>The binomial</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>binomial +<span class="c1">#&gt; &lt;binomial&gt;</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; epithet: annua</span> +<span class="c1">#&gt; canonical: Poa annua</span> +<span class="c1">#&gt; species: Poa annua L.</span> +<span class="c1">#&gt; authority: L.</span> +</code></pre></div> +<p>The authority</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>binomial<span class="o">$</span>authority +<span class="c1">#&gt; [1] &quot;L.&quot;</span> +</code></pre></div> +<p>The classification</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>grouping +<span class="c1">#&gt; &lt;grouping&gt;</span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>The family</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>grouping<span class="o">$</span>family +<span class="c1">#&gt; &lt;taxonref&gt;</span> +<span class="c1">#&gt; rank: family</span> +<span class="c1">#&gt; name: Poaceae</span> +<span class="c1">#&gt; id: none</span> +<span class="c1">#&gt; uri: none</span> +</code></pre></div> +<h2>Subset taxon objects</h2> + +<p>Get one or more ranks via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> pick<span class="p">(</span>family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; family: Poaceae</span> +obj <span class="o">%&gt;%</span> pick<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +</code></pre></div> +<p>Drop one or more ranks via <code>pop()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> pop<span class="p">(</span>family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +obj <span class="o">%&gt;%</span> pop<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>Get a range of ranks via <code>span()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> span<span class="p">(</span>kingdom<span class="p">,</span> family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +</code></pre></div> +<p>Extract classification as a <code>data.frame</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">gethier<span class="p">(</span>obj<span class="p">)</span> +<span class="c1">#&gt; rank name</span> +<span class="c1">#&gt; 1 kingdom Plantae</span> +<span class="c1">#&gt; 2 clazz Poales</span> +<span class="c1">#&gt; 3 family Poaceae</span> +<span class="c1">#&gt; 4 genus Poa</span> +<span class="c1">#&gt; 5 species Poa annua</span> +<span class="c1">#&gt; 6 variety annua</span> +</code></pre></div> +<h2>Taxonomic data.frame&#39;s</h2> + +<p>Make one</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>order <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Asterales&#39;</span><span class="p">,</span><span class="s">&#39;Asterales&#39;</span><span class="p">,</span><span class="s">&#39;Fagales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">),</span> + family <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Asteraceae&#39;</span><span class="p">,</span><span class="s">&#39;Asteraceae&#39;</span><span class="p">,</span><span class="s">&#39;Fagaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">),</span> + genus <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Helianthus&#39;</span><span class="p">,</span><span class="s">&#39;Helianthus&#39;</span><span class="p">,</span><span class="s">&#39;Quercus&#39;</span><span class="p">,</span><span class="s">&#39;Poa&#39;</span><span class="p">,</span><span class="s">&#39;Festuca&#39;</span><span class="p">,</span><span class="s">&#39;Holodiscus&#39;</span><span class="p">),</span> + stringsAsFactors <span class="o">=</span> <span class="kc">FALSE</span><span class="p">)</span> +<span class="p">(</span>df2 <span class="o">&lt;-</span> taxon_df<span class="p">(</span>df<span class="p">))</span> +<span class="c1">#&gt; order family genus</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus</span> +</code></pre></div> +<p>Parse - get rank order via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> pick<span class="p">(</span><span class="kp">order</span><span class="p">)</span> +<span class="c1">#&gt; order</span> +<span class="c1">#&gt; 1 Asterales</span> +<span class="c1">#&gt; 2 Asterales</span> +<span class="c1">#&gt; 3 Fagales</span> +<span class="c1">#&gt; 4 Poales</span> +<span class="c1">#&gt; 5 Poales</span> +<span class="c1">#&gt; 6 Poales</span> +</code></pre></div> +<p>get ranks order, family, and genus via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> pick<span class="p">(</span><span class="kp">order</span><span class="p">,</span> family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; order family genus</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus</span> +</code></pre></div> +<p>get range of names via <code>span()</code>, from rank <code>X</code> to rank <code>Y</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> span<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; family genus</span> +<span class="c1">#&gt; 1 Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poaceae Poa</span> +<span class="c1">#&gt; 5 Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poaceae Holodiscus</span> +</code></pre></div> +<p>Separate each row into a <code>taxon</code> class (many <code>taxon</code> objects are a <code>taxa</code> class)</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">scatter<span class="p">(</span>df2<span class="p">)</span> +<span class="c1">#&gt; [[1]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Helianthus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Asterales</span> +<span class="c1">#&gt; family: Asteraceae</span> +<span class="c1">#&gt; genus: Helianthus</span> +<span class="c1">#&gt; species: Helianthus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[2]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Helianthus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Asterales</span> +<span class="c1">#&gt; family: Asteraceae</span> +<span class="c1">#&gt; genus: Helianthus</span> +<span class="c1">#&gt; species: Helianthus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[3]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Quercus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Fagales</span> +<span class="c1">#&gt; family: Fagaceae</span> +<span class="c1">#&gt; genus: Quercus</span> +<span class="c1">#&gt; species: Quercus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[4]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[5]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Festuca none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Festuca</span> +<span class="c1">#&gt; species: Festuca none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[6]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Holodiscus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Holodiscus</span> +<span class="c1">#&gt; species: Holodiscus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; attr(,&quot;class&quot;)</span> +<span class="c1">#&gt; [1] &quot;taxa&quot;</span> +</code></pre></div> +<p>And you can re-assemble a data.frame from the output of <code>scatter()</code> with <code>assemble()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">out <span class="o">&lt;-</span> scatter<span class="p">(</span>df2<span class="p">)</span> +assemble<span class="p">(</span>out<span class="p">)</span> +<span class="c1">#&gt; order family genus species</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus Helianthus none</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus Helianthus none</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus Quercus none</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa Poa none</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca Festuca none</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus Holodiscus none</span> +</code></pre></div> + 2015-12-08 00:00:00 +0100 + http://recology.info//2015/12/binomen-taxonomy-tools/ + + Crossref programmatic clients <p>I gave two talks recently at the annual <a href="http://www.crossref.org/annualmeeting/agenda.html">Crossref meeting</a>, one of which was a somewhat technical overview of programmatic clients for Crossref APIs. Check out the talk <a href="https://crossref.wistia.com/medias/8rh0jm5eda">here</a>. I talked about the motivation for working with Crossref data by writing code/etc. rather than going the GUI route, then went over the various clients, with brief examples.</p> @@ -181,7 +449,7 @@ cr_cn<span class="p">(</span>dois<span class="o&qu <p>If you find any bugs, please do file an issue.</p> - 2015-11-30 00:00:00 -0800 + 2015-11-30 00:00:00 +0100 http://recology.info//2015/11/crossref-clients/ @@ -347,7 +615,7 @@ ggplot<span class="p">(</span>res_all<span class=" </code></pre></div> <p><img src="/public/img/2015-10-21-noaa-isd/unnamed-chunk-12-1.png" alt="img"> </p> - 2015-10-21 00:00:00 -0700 + 2015-10-21 00:00:00 +0200 http://recology.info//2015/10/noaa-isd/ @@ -461,7 +729,7 @@ ggplot<span class="p">(</span>res_all<span class=" <p>I&#39;m sure I missed things. Let me know.</p> - 2015-10-19 00:00:00 -0700 + 2015-10-19 00:00:00 +0200 http://recology.info//2015/10/open-source-metrics/ @@ -524,7 +792,7 @@ creating a Digital Ocean account, authenticating, and have many examples.</p& <p>Let us know what you think. We&#39;d love to hear about any problems, use cases, feature requests. </p> - 2015-10-02 00:00:00 -0700 + 2015-10-02 00:00:00 +0200 http://recology.info//2015/10/analogsea-cran/ @@ -670,7 +938,7 @@ creating a Digital Ocean account, authenticating, and have many examples.</p& <span class="c1">#&gt; .. ...</span> <span class="c1">#&gt; Variables not shown: setName (chr)</span> </code></pre></div> - 2015-09-11 00:00:00 -0700 + 2015-09-11 00:00:00 +0200 http://recology.info//2015/09/oai-client/ @@ -1097,7 +1365,7 @@ ggplot<span class="p">(</span>df<span class="p&quo </code></pre></div> <p><img src="/public/img/2015-08-07-full-text/unnamed-chunk-23-1.png" alt="plot of chunk unnamed-chunk-23"> </p> - 2015-08-07 00:00:00 -0700 + 2015-08-07 00:00:00 +0200 http://recology.info//2015/08/full-text/ @@ -1613,7 +1881,7 @@ sp<span class="o">::</span>plot<span class="p" </code></pre></div> <p><img src="/public/img/2015-07-07-weather-data-with-rnoaa/unnamed-chunk-19-1.png" alt="plot of chunk unnamed-chunk-19"> </p> - 2015-07-07 00:00:00 -0700 + 2015-07-07 00:00:00 +0200 http://recology.info//2015/07/weather-data-with-rnoaa/ @@ -1912,7 +2180,7 @@ ggplot<span class="p">(</span>df<span class="p&quo </code></pre></div> <p><img src="/public/img/2015-06-24-rerddap/unnamed-chunk-19-1.png" alt="plot of chunk unnamed-chunk-19"> </p> - 2015-06-24 00:00:00 -0700 + 2015-06-24 00:00:00 +0200 http://recology.info//2015/06/rerddap/ @@ -2030,168 +2298,9 @@ leaflet<span class="p">(</span>data <span class="o </code></pre></div> <p><img src="/public/img/2015-06-08-idigbio-in-spocc/plot.png" alt="image"></p> - 2015-06-08 00:00:00 -0700 + 2015-06-08 00:00:00 +0200 http://recology.info//2015/06/idigbio-in-spocc/ - - openadds - open addresses client - <p><code>openadds</code> talks to <a href="http://openaddresses.io/">Openaddresses.io</a>. a run down of its things:</p> - -<h2>Install</h2> -<div class="highlight"><pre><code class="language-r" data-lang="r">devtools<span class="o">::</span>install_github<span class="p">(</span><span class="s">&quot;sckott/openadds&quot;</span><span class="p">)</span> -</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;openadds&quot;</span><span class="p">)</span> -</code></pre></div> -<h2>List datasets</h2> - -<p>Scrapes links to datasets from the openaddresses site</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">dat <span class="o">&lt;-</span> oa_list<span class="p">()</span> -dat<span class="p">[</span><span class="m">2</span><span class="o">:</span><span class="m">6</span><span class="p">]</span> -<span class="c1">#&gt; [1] &quot;http://data.openaddresses.io.s3.amazonaws.com/20150511/au-tas-launceston.csv&quot; </span> -<span class="c1">#&gt; [2] &quot;http://s3.amazonaws.com/data.openaddresses.io/20141127/au-victoria.zip&quot; </span> -<span class="c1">#&gt; [3] &quot;http://data.openaddresses.io.s3.amazonaws.com/20150511/be-flanders.zip&quot; </span> -<span class="c1">#&gt; [4] &quot;http://data.openaddresses.io.s3.amazonaws.com/20150417/ca-ab-calgary.zip&quot; </span> -<span class="c1">#&gt; [5] &quot;http://data.openaddresses.io.s3.amazonaws.com/20150511/ca-ab-grande_prairie.zip&quot;</span> -</code></pre></div> -<h2>Search for datasets</h2> - -<p>Uses <code>oa_list()</code> internally, then searches through columns requested.</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">oa_search<span class="p">(</span>country <span class="o">=</span> <span class="s">&quot;us&quot;</span><span class="p">,</span> state <span class="o">=</span> <span class="s">&quot;ca&quot;</span><span class="p">)</span> -<span class="c1">#&gt; Source: local data frame [68 x 5]</span> -<span class="c1">#&gt; </span> -<span class="c1">#&gt; country state city ext</span> -<span class="c1">#&gt; 1 us ca san_mateo_county .zip</span> -<span class="c1">#&gt; 2 us ca alameda_county .zip</span> -<span class="c1">#&gt; 3 us ca alameda_county .zip</span> -<span class="c1">#&gt; 4 us ca amador .zip</span> -<span class="c1">#&gt; 5 us ca amador .zip</span> -<span class="c1">#&gt; 6 us ca bakersfield .zip</span> -<span class="c1">#&gt; 7 us ca bakersfield .zip</span> -<span class="c1">#&gt; 8 us ca berkeley .zip</span> -<span class="c1">#&gt; 9 us ca berkeley .zip</span> -<span class="c1">#&gt; 10 us ca butte_county .zip</span> -<span class="c1">#&gt; .. ... ... ... ...</span> -<span class="c1">#&gt; Variables not shown: url (chr)</span> -</code></pre></div> -<h2>Get data</h2> - -<p>Passing in a URL</p> -<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="p">(</span>out1 <span class="o">&lt;-</span> oa_get<span class="p">(</span>dat<span class="p">[</span><span class="m">5</span><span class="p">]))</span> -<span class="c1">#&gt; &lt;Openaddresses data&gt; ~/.openadds/ca-ab-calgary.zip</span> -<span class="c1">#&gt; Dimensions [350962, 13]</span> -<span class="c1">#&gt; </span> -<span class="c1">#&gt; OBJECTID ADDRESS_TY ADDRESS STREET_NAM STREET_TYP</span> -<span class="c1">#&gt; 0 757023 Parcel 249 SAGE MEADOWS CI NW SAGE MEADOWS CI</span> -<span class="c1">#&gt; 1 757022 Parcel 2506 17 ST SE 17 ST</span> -<span class="c1">#&gt; 2 757021 Parcel 305 EVANSPARK GD NW EVANSPARK GD</span> -<span class="c1">#&gt; 3 757020 Parcel 321 EVANSPARK GD NW EVANSPARK GD</span> -<span class="c1">#&gt; 4 757019 Parcel 204 EVANSBROOKE LD NW EVANSBROOKE LD</span> -<span class="c1">#&gt; 5 757018 Parcel 200 EVANSBROOKE LD NW EVANSBROOKE LD</span> -<span class="c1">#&gt; 6 757017 Parcel 219 HIDDEN VALLEY LD NW HIDDEN VALLEY LD</span> -<span class="c1">#&gt; 7 757016 Parcel 211 HIDDEN VALLEY LD NW HIDDEN VALLEY LD</span> -<span class="c1">#&gt; 8 757015 Parcel 364 HIDDEN VALLEY LD NW HIDDEN VALLEY LD</span> -<span class="c1">#&gt; 9 757014 Parcel 348 HIDDEN VALLEY LD NW HIDDEN VALLEY LD</span> -<span class="c1">#&gt; .. ... ... ... ... ...</span> -<span class="c1">#&gt; Variables not shown: STREET_QUA (fctr), HOUSE_NUMB (int), HOUSE_ALPH</span> -<span class="c1">#&gt; (fctr), SUITE_NUMB (int), SUITE_ALPH (fctr), LONGITUDE (dbl),</span> -<span class="c1">#&gt; LATITUDE (dbl), COMM_NAME (fctr)</span> -</code></pre></div> -<p>First getting URL for dataset through <code>as_openadd()</code>, then passing to <code>oa_get()</code></p> -<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="p">(</span>x <span class="o">&lt;-</span> as_openadd<span class="p">(</span><span class="s">&quot;us&quot;</span><span class="p">,</span> <span class="s">&quot;nm&quot;</span><span class="p">,</span> <span class="s">&quot;hidalgo&quot;</span><span class="p">))</span> -<span class="c1">#&gt; &lt;&lt;OpenAddreses&gt;&gt; </span> -<span class="c1">#&gt; &lt;&lt;country&gt;&gt; us</span> -<span class="c1">#&gt; &lt;&lt;state&gt;&gt; nm</span> -<span class="c1">#&gt; &lt;&lt;city&gt;&gt; hidalgo</span> -<span class="c1">#&gt; &lt;&lt;extension&gt;&gt; .csv</span> -</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r">oa_get<span class="p">(</span>x<span class="p">)</span> -<span class="c1">#&gt; &lt;Openaddresses data&gt; ~/.openadds/us-nm-hidalgo.csv</span> -<span class="c1">#&gt; Dimensions [170659, 37]</span> -<span class="c1">#&gt; </span> -<span class="c1">#&gt; OBJECTID Shape ADD_NUM ADD_SUF PRE_MOD PRE_DIR PRE_TYPE ST_NAME</span> -<span class="c1">#&gt; 1 1 NA 422 S 2ND</span> -<span class="c1">#&gt; 2 2 NA 1413 S 4TH</span> -<span class="c1">#&gt; 3 3 NA 412 E CHAMPION</span> -<span class="c1">#&gt; 4 4 NA 110 E SAMANO</span> -<span class="c1">#&gt; 5 5 NA 2608 W FREDDY GONZALEZ</span> -<span class="c1">#&gt; 6 6 NA 2604 W FREDDY GONZALEZ</span> -<span class="c1">#&gt; 7 7 NA 1123 W FAY</span> -<span class="c1">#&gt; 8 8 NA 417 S 2ND</span> -<span class="c1">#&gt; 9 9 NA 4551 E TEXAS</span> -<span class="c1">#&gt; 10 10 NA 810 DRIFTWOOD</span> -<span class="c1">#&gt; .. ... ... ... ... ... ... ... ...</span> -<span class="c1">#&gt; Variables not shown: ST_TYPE (chr), POS_DIR (chr), POS_MOD (chr), ESN</span> -<span class="c1">#&gt; (int), MSAG_COMM (chr), PARCEL_ID (chr), PLACE_TYPE (chr), LANDMARK</span> -<span class="c1">#&gt; (chr), BUILDING (chr), UNIT (chr), ROOM (chr), FLOOR (int), LOC_NOTES</span> -<span class="c1">#&gt; (chr), ST_ALIAS (chr), FULL_ADDR (chr), ZIP (chr), POSTAL_COM (chr),</span> -<span class="c1">#&gt; MUNICIPAL (chr), COUNTY (chr), STATE (chr), SOURCE (chr), REGION</span> -<span class="c1">#&gt; (chr), EXCH (chr), LAT (dbl), LONG (dbl), PICTURE (chr), OA:x (dbl),</span> -<span class="c1">#&gt; OA:y (dbl), OA:geom (chr)</span> -</code></pre></div> -<h2>Combine multiple datasets</h2> - -<p><code>combine</code> attemps to guess lat/long and address columns, but definitely more work to do to make -this work for most cases. Lat/long and address columns vary among every dataset - some datasets -have no lat/long data, some have no address data.</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">out2 <span class="o">&lt;-</span> oa_get<span class="p">(</span>dat<span class="p">[</span><span class="m">32</span><span class="p">])</span> -<span class="p">(</span>alldat <span class="o">&lt;-</span> oa_combine<span class="p">(</span>out1<span class="p">,</span> out2<span class="p">))</span> -<span class="c1">#&gt; Source: local data frame [418,623 x 4]</span> -<span class="c1">#&gt; </span> -<span class="c1">#&gt; lon lat address dataset</span> -<span class="c1">#&gt; 1 -114.1303 51.17188 249 SAGE MEADOWS CI NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 2 -114.0190 51.03168 2506 17 ST SE ca-ab-calgary.zip</span> -<span class="c1">#&gt; 3 -114.1175 51.17497 305 EVANSPARK GD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 4 -114.1175 51.17461 321 EVANSPARK GD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 5 -114.1212 51.16268 204 EVANSBROOKE LD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 6 -114.1213 51.16264 200 EVANSBROOKE LD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 7 -114.1107 51.14784 219 HIDDEN VALLEY LD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 8 -114.1108 51.14768 211 HIDDEN VALLEY LD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 9 -114.1121 51.14780 364 HIDDEN VALLEY LD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; 10 -114.1117 51.14800 348 HIDDEN VALLEY LD NW ca-ab-calgary.zip</span> -<span class="c1">#&gt; .. ... ... ... ...</span> -</code></pre></div> -<h2>Map data</h2> - -<p>Get some data</p> -<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="p">(</span>out <span class="o">&lt;-</span> oa_get<span class="p">(</span>dat<span class="p">[</span><span class="m">400</span><span class="p">]))</span> -<span class="c1">#&gt; &lt;Openaddresses data&gt; ~/.openadds/us-ca-sonoma_county.zip</span> -<span class="c1">#&gt; Dimensions [217243, 5]</span> -<span class="c1">#&gt; </span> -<span class="c1">#&gt; LON LAT NUMBER STREET POSTCODE</span> -<span class="c1">#&gt; 1 -122.5327 38.29779 3771 A Cory Lane NA</span> -<span class="c1">#&gt; 2 -122.5422 38.30354 18752 White Oak Drive NA</span> -<span class="c1">#&gt; 3 -122.5412 38.30327 18749 White Oak Drive NA</span> -<span class="c1">#&gt; 4 -122.3997 38.26122 3552 Napa Road NA</span> -<span class="c1">#&gt; 5 -122.5425 38.30404 3998 White Oak Court NA</span> -<span class="c1">#&gt; 6 -122.5429 38.30434 4026 White Oak Court NA</span> -<span class="c1">#&gt; 7 -122.5430 38.30505 4039 White Oak Court NA</span> -<span class="c1">#&gt; 8 -122.5417 38.30504 4017 White Oak Court NA</span> -<span class="c1">#&gt; 9 -122.5409 38.30436 18702 White Oak Drive NA</span> -<span class="c1">#&gt; 10 -122.5403 38.30392 18684 White Oak Drive NA</span> -<span class="c1">#&gt; .. ... ... ... ... ...</span> -</code></pre></div> -<p>Make an interactive map (not all data)</p> -<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;leaflet&quot;</span><span class="p">)</span> - -x <span class="o">&lt;-</span> oa_get<span class="p">(</span>oa_search<span class="p">(</span>country <span class="o">=</span> <span class="s">&quot;us&quot;</span><span class="p">,</span> city <span class="o">=</span> <span class="s">&quot;boulder&quot;</span><span class="p">)[</span><span class="m">1</span><span class="p">,]</span><span class="o">$</span><span class="kp">url</span><span class="p">)</span> -y <span class="o">&lt;-</span> oa_get<span class="p">(</span>oa_search<span class="p">(</span>country <span class="o">=</span> <span class="s">&quot;us&quot;</span><span class="p">,</span> city <span class="o">=</span> <span class="s">&quot;gunnison&quot;</span><span class="p">)[</span><span class="m">1</span><span class="p">,]</span><span class="o">$</span><span class="kp">url</span><span class="p">)</span> -oa_combine<span class="p">(</span>x<span class="p">,</span> y<span class="p">)</span> <span class="o">%&gt;%</span> - leaflet<span class="p">()</span> <span class="o">%&gt;%</span> - addTiles<span class="p">()</span> <span class="o">%&gt;%</span> - addCircles<span class="p">(</span>lat <span class="o">=</span> <span class="o">~</span>lat<span class="p">,</span> lng <span class="o">=</span> <span class="o">~</span>lon<span class="p">,</span> popup <span class="o">=</span> <span class="o">~</span>address<span class="p">)</span> -</code></pre></div> -<p><img src="/public/img/2015-05-18-openadds/map.png" alt="image"></p> - -<h2>To do</h2> - -<ul> -<li>Surely there are many datasets that won&#39;t work in <code>oa_combine()</code> - gotta go through many more.</li> -<li>An easy viz function wrapping <code>leaflet</code></li> -<li>Since you can get a lot of spatial data quickly, easy way to visualize big data, maybe marker clusters?</li> -</ul> - - 2015-05-18 00:00:00 -0700 - http://recology.info//2015/05/openadds/ - - diff --git a/_site/feed.xml b/_site/feed.xml index b0fd499fe0..7f194fa570 100644 --- a/_site/feed.xml +++ b/_site/feed.xml @@ -4,6 +4,274 @@ http://recology.info/ + + binomen - Tools for slicing and dicing taxonomic names + <p>The first version of <code>binomen</code> is now up on [CRAN][binomecran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to <a href="https://github.com/ropensci/taxize">taxize</a>, where you can get taxonomic data on taxonomic names from the web.</p> + +<p>The classes (S3):</p> + +<ul> +<li><code>taxon</code></li> +<li><code>taxonref</code></li> +<li><code>taxonrefs</code></li> +<li><code>binomial</code></li> +<li><code>grouping</code> (i.e., classification - used different term to avoid conflict with classification in <code>taxize</code>)</li> +</ul> + +<p>For example, the <code>binomial</code> class is defined by a genus, epithet, authority, and optional full species name and canonical version.</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">binomial<span class="p">(</span><span class="s">&quot;Poa&quot;</span><span class="p">,</span> <span class="s">&quot;annua&quot;</span><span class="p">,</span> authority<span class="o">=</span><span class="s">&quot;L.&quot;</span><span class="p">)</span> +</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="o">&lt;</span>binomial<span class="o">&gt;</span> + genus<span class="o">:</span> Poa + epithet<span class="o">:</span> annua + canonical<span class="o">:</span> + species<span class="o">:</span> + authority<span class="o">:</span> L. +</code></pre></div> +<p>The package has a suite of functions to work on these taxonomic classes:</p> + +<ul> +<li><code>gethier()</code> - get hierarchy from a <code>taxon</code> class</li> +<li><code>scatter()</code> - make each row in taxonomic data.frame (<code>taxondf</code>) a separate <code>taxon</code> object within a single <code>taxa</code> object</li> +<li><code>assemble()</code> - make a <code>taxa</code> object into a <code>taxondf</code> data.frame</li> +<li><code>pick()</code> - pick out one or more taxonomic groups</li> +<li><code>pop()</code> - pop out (drop) one or more taxonomic groups</li> +<li><code>span()</code> - pick a range between two taxonomic groups (inclusive)</li> +<li><code>strain()</code> - filter by taxonomic groups, like dplyr&#39;s filter</li> +<li><code>name()</code> - get the taxon name for each <code>taxonref</code> object</li> +<li><code>uri()</code> - get the reference uri for each <code>taxonref</code> object</li> +<li><code>rank()</code> - get the taxonomic rank for each <code>taxonref</code> object</li> +<li><code>id()</code> - get the reference uri for each <code>taxonref</code> object</li> +</ul> + +<p>The approach in this package I suppose is sort of like <code>split-apply-combine</code> from <code>plyr</code>/<code>dplyr</code>, whereas this is aims to make it easy to do with taxonomic names.</p> + +<h2>Install</h2> + +<p>For examples below, you&#39;ll need the development version:</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">install.packages<span class="p">(</span><span class="s">&quot;binomen&quot;</span><span class="p">)</span> +</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;binomen&quot;</span><span class="p">)</span> +</code></pre></div> +<h2>Make a taxon</h2> + +<p>Make a taxon object</p> +<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="p">(</span>obj <span class="o">&lt;-</span> make_taxon<span class="p">(</span>genus<span class="o">=</span><span class="s">&quot;Poa&quot;</span><span class="p">,</span> epithet<span class="o">=</span><span class="s">&quot;annua&quot;</span><span class="p">,</span> authority<span class="o">=</span><span class="s">&quot;L.&quot;</span><span class="p">,</span> + family<span class="o">=</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span> clazz<span class="o">=</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span> kingdom<span class="o">=</span><span class="s">&#39;Plantae&#39;</span><span class="p">,</span> variety<span class="o">=</span><span class="s">&#39;annua&#39;</span><span class="p">))</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>Index to various parts of the object</p> + +<p>The binomial</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>binomial +<span class="c1">#&gt; &lt;binomial&gt;</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; epithet: annua</span> +<span class="c1">#&gt; canonical: Poa annua</span> +<span class="c1">#&gt; species: Poa annua L.</span> +<span class="c1">#&gt; authority: L.</span> +</code></pre></div> +<p>The authority</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>binomial<span class="o">$</span>authority +<span class="c1">#&gt; [1] &quot;L.&quot;</span> +</code></pre></div> +<p>The classification</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>grouping +<span class="c1">#&gt; &lt;grouping&gt;</span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>The family</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj<span class="o">$</span>grouping<span class="o">$</span>family +<span class="c1">#&gt; &lt;taxonref&gt;</span> +<span class="c1">#&gt; rank: family</span> +<span class="c1">#&gt; name: Poaceae</span> +<span class="c1">#&gt; id: none</span> +<span class="c1">#&gt; uri: none</span> +</code></pre></div> +<h2>Subset taxon objects</h2> + +<p>Get one or more ranks via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> pick<span class="p">(</span>family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; family: Poaceae</span> +obj <span class="o">%&gt;%</span> pick<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +</code></pre></div> +<p>Drop one or more ranks via <code>pop()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> pop<span class="p">(</span>family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +obj <span class="o">%&gt;%</span> pop<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; species: Poa annua</span> +<span class="c1">#&gt; variety: annua</span> +</code></pre></div> +<p>Get a range of ranks via <code>span()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">obj <span class="o">%&gt;%</span> span<span class="p">(</span>kingdom<span class="p">,</span> family<span class="p">)</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa annua</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; kingdom: Plantae</span> +<span class="c1">#&gt; clazz: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +</code></pre></div> +<p>Extract classification as a <code>data.frame</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">gethier<span class="p">(</span>obj<span class="p">)</span> +<span class="c1">#&gt; rank name</span> +<span class="c1">#&gt; 1 kingdom Plantae</span> +<span class="c1">#&gt; 2 clazz Poales</span> +<span class="c1">#&gt; 3 family Poaceae</span> +<span class="c1">#&gt; 4 genus Poa</span> +<span class="c1">#&gt; 5 species Poa annua</span> +<span class="c1">#&gt; 6 variety annua</span> +</code></pre></div> +<h2>Taxonomic data.frame&#39;s</h2> + +<p>Make one</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df <span class="o">&lt;-</span> <span class="kt">data.frame</span><span class="p">(</span>order <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Asterales&#39;</span><span class="p">,</span><span class="s">&#39;Asterales&#39;</span><span class="p">,</span><span class="s">&#39;Fagales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">,</span><span class="s">&#39;Poales&#39;</span><span class="p">),</span> + family <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Asteraceae&#39;</span><span class="p">,</span><span class="s">&#39;Asteraceae&#39;</span><span class="p">,</span><span class="s">&#39;Fagaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">,</span><span class="s">&#39;Poaceae&#39;</span><span class="p">),</span> + genus <span class="o">=</span> <span class="kt">c</span><span class="p">(</span><span class="s">&#39;Helianthus&#39;</span><span class="p">,</span><span class="s">&#39;Helianthus&#39;</span><span class="p">,</span><span class="s">&#39;Quercus&#39;</span><span class="p">,</span><span class="s">&#39;Poa&#39;</span><span class="p">,</span><span class="s">&#39;Festuca&#39;</span><span class="p">,</span><span class="s">&#39;Holodiscus&#39;</span><span class="p">),</span> + stringsAsFactors <span class="o">=</span> <span class="kc">FALSE</span><span class="p">)</span> +<span class="p">(</span>df2 <span class="o">&lt;-</span> taxon_df<span class="p">(</span>df<span class="p">))</span> +<span class="c1">#&gt; order family genus</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus</span> +</code></pre></div> +<p>Parse - get rank order via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> pick<span class="p">(</span><span class="kp">order</span><span class="p">)</span> +<span class="c1">#&gt; order</span> +<span class="c1">#&gt; 1 Asterales</span> +<span class="c1">#&gt; 2 Asterales</span> +<span class="c1">#&gt; 3 Fagales</span> +<span class="c1">#&gt; 4 Poales</span> +<span class="c1">#&gt; 5 Poales</span> +<span class="c1">#&gt; 6 Poales</span> +</code></pre></div> +<p>get ranks order, family, and genus via <code>pick()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> pick<span class="p">(</span><span class="kp">order</span><span class="p">,</span> family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; order family genus</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus</span> +</code></pre></div> +<p>get range of names via <code>span()</code>, from rank <code>X</code> to rank <code>Y</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">df2 <span class="o">%&gt;%</span> span<span class="p">(</span>family<span class="p">,</span> genus<span class="p">)</span> +<span class="c1">#&gt; family genus</span> +<span class="c1">#&gt; 1 Asteraceae Helianthus</span> +<span class="c1">#&gt; 2 Asteraceae Helianthus</span> +<span class="c1">#&gt; 3 Fagaceae Quercus</span> +<span class="c1">#&gt; 4 Poaceae Poa</span> +<span class="c1">#&gt; 5 Poaceae Festuca</span> +<span class="c1">#&gt; 6 Poaceae Holodiscus</span> +</code></pre></div> +<p>Separate each row into a <code>taxon</code> class (many <code>taxon</code> objects are a <code>taxa</code> class)</p> +<div class="highlight"><pre><code class="language-r" data-lang="r">scatter<span class="p">(</span>df2<span class="p">)</span> +<span class="c1">#&gt; [[1]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Helianthus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Asterales</span> +<span class="c1">#&gt; family: Asteraceae</span> +<span class="c1">#&gt; genus: Helianthus</span> +<span class="c1">#&gt; species: Helianthus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[2]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Helianthus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Asterales</span> +<span class="c1">#&gt; family: Asteraceae</span> +<span class="c1">#&gt; genus: Helianthus</span> +<span class="c1">#&gt; species: Helianthus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[3]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Quercus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Fagales</span> +<span class="c1">#&gt; family: Fagaceae</span> +<span class="c1">#&gt; genus: Quercus</span> +<span class="c1">#&gt; species: Quercus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[4]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Poa none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Poa</span> +<span class="c1">#&gt; species: Poa none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[5]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Festuca none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Festuca</span> +<span class="c1">#&gt; species: Festuca none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; [[6]]</span> +<span class="c1">#&gt; &lt;taxon&gt;</span> +<span class="c1">#&gt; binomial: Holodiscus none</span> +<span class="c1">#&gt; grouping: </span> +<span class="c1">#&gt; order: Poales</span> +<span class="c1">#&gt; family: Poaceae</span> +<span class="c1">#&gt; genus: Holodiscus</span> +<span class="c1">#&gt; species: Holodiscus none</span> +<span class="c1">#&gt; </span> +<span class="c1">#&gt; attr(,&quot;class&quot;)</span> +<span class="c1">#&gt; [1] &quot;taxa&quot;</span> +</code></pre></div> +<p>And you can re-assemble a data.frame from the output of <code>scatter()</code> with <code>assemble()</code></p> +<div class="highlight"><pre><code class="language-r" data-lang="r">out <span class="o">&lt;-</span> scatter<span class="p">(</span>df2<span class="p">)</span> +assemble<span class="p">(</span>out<span class="p">)</span> +<span class="c1">#&gt; order family genus species</span> +<span class="c1">#&gt; 1 Asterales Asteraceae Helianthus Helianthus none</span> +<span class="c1">#&gt; 2 Asterales Asteraceae Helianthus Helianthus none</span> +<span class="c1">#&gt; 3 Fagales Fagaceae Quercus Quercus none</span> +<span class="c1">#&gt; 4 Poales Poaceae Poa Poa none</span> +<span class="c1">#&gt; 5 Poales Poaceae Festuca Festuca none</span> +<span class="c1">#&gt; 6 Poales Poaceae Holodiscus Holodiscus none</span> +</code></pre></div> + 2015-12-08 00:00:00 +0100 + http://recology.info//2015/12/binomen-taxonomy-tools/ + + Crossref programmatic clients <p>I gave two talks recently at the annual <a href="http://www.crossref.org/annualmeeting/agenda.html">Crossref meeting</a>, one of which was a somewhat technical overview of programmatic clients for Crossref APIs. Check out the talk <a href="https://crossref.wistia.com/medias/8rh0jm5eda">here</a>. I talked about the motivation for working with Crossref data by writing code/etc. rather than going the GUI route, then went over the various clients, with brief examples.</p> @@ -180,7 +448,7 @@ cr_cn<span class="p">(</span>dois<span class="o&qu <p>If you find any bugs, please do file an issue.</p> - 2015-11-30 00:00:00 -0800 + 2015-11-30 00:00:00 +0100 http://recology.info//2015/11/crossref-clients/ @@ -315,7 +583,7 @@ cr_cn<span class="p">(</span>dois<span class="o&qu <p>Would love any feedback...</p> - 2015-11-12 00:00:00 -0800 + 2015-11-12 00:00:00 +0100 http://recology.info//2015/11/pygbif/ @@ -481,7 +749,7 @@ ggplot<span class="p">(</span>res_all<span class=" </code></pre></div> <p><img src="/public/img/2015-10-21-noaa-isd/unnamed-chunk-12-1.png" alt="img"> </p> - 2015-10-21 00:00:00 -0700 + 2015-10-21 00:00:00 +0200 http://recology.info//2015/10/noaa-isd/ @@ -595,7 +863,7 @@ ggplot<span class="p">(</span>res_all<span class=" <p>I&#39;m sure I missed things. Let me know.</p> - 2015-10-19 00:00:00 -0700 + 2015-10-19 00:00:00 +0200 http://recology.info//2015/10/open-source-metrics/ @@ -658,7 +926,7 @@ creating a Digital Ocean account, authenticating, and have many examples.</p& <p>Let us know what you think. We&#39;d love to hear about any problems, use cases, feature requests. </p> - 2015-10-02 00:00:00 -0700 + 2015-10-02 00:00:00 +0200 http://recology.info//2015/10/analogsea-cran/ @@ -804,7 +1072,7 @@ creating a Digital Ocean account, authenticating, and have many examples.</p& <span class="c1">#&gt; .. ...</span> <span class="c1">#&gt; Variables not shown: setName (chr)</span> </code></pre></div> - 2015-09-11 00:00:00 -0700 + 2015-09-11 00:00:00 +0200 http://recology.info//2015/09/oai-client/ @@ -1231,7 +1499,7 @@ ggplot<span class="p">(</span>df<span class="p&quo </code></pre></div> <p><img src="/public/img/2015-08-07-full-text/unnamed-chunk-23-1.png" alt="plot of chunk unnamed-chunk-23"> </p> - 2015-08-07 00:00:00 -0700 + 2015-08-07 00:00:00 +0200 http://recology.info//2015/08/full-text/ @@ -1747,7 +2015,7 @@ sp<span class="o">::</span>plot<span class="p" </code></pre></div> <p><img src="/public/img/2015-07-07-weather-data-with-rnoaa/unnamed-chunk-19-1.png" alt="plot of chunk unnamed-chunk-19"> </p> - 2015-07-07 00:00:00 -0700 + 2015-07-07 00:00:00 +0200 http://recology.info//2015/07/weather-data-with-rnoaa/ @@ -2046,127 +2314,9 @@ ggplot<span class="p">(</span>df<span class="p&quo </code></pre></div> <p><img src="/public/img/2015-06-24-rerddap/unnamed-chunk-19-1.png" alt="plot of chunk unnamed-chunk-19"> </p> - 2015-06-24 00:00:00 -0700 + 2015-06-24 00:00:00 +0200 http://recology.info//2015/06/rerddap/ - - iDigBio - a new data source in spocc - <p><a href="https://www.idigbio.org/">iDigBio</a>, or <em>Integrated Digitized Biocollections</em>, collects and provides access to species occurrence data, and associated metadata (e.g., images of specimens, when provided). They collect data from <a href="https://www.idigbio.org/portal/publishers">a lot of different providers</a>. They have a nice web interface for searching, check out <a href="https://www.idigbio.org/portal/search">idigbio.org/portal/search</a>. </p> - -<p><code>spocc</code> is a package we&#39;ve been working on at <a href="http://ropensci.org/">rOpenSci</a> for a while now - it is a one stop shop for retrieving species ocurrence data. As new sources of species occurrence data come to our attention, and are available via a RESTful API, we incorporate them into <code>spocc</code>. </p> - -<p>I attended last week a <a href="https://github.com/idigbio-api-hackathon/HackathonCentral/">hackathon put on by iDigBio</a>. One of the projects I worked on was integrating iDigBio into <code>spocc</code>. </p> - -<p>With the addition of iDigBio, we now have in <code>spocc</code>:</p> - -<ul> -<li><a href="http://www.gbif.org/">GBIF</a></li> -<li><a href="http://www.inaturalist.org/">iNaturalist</a></li> -<li><a href="http://bison.usgs.ornl.gov/">USGS Bison</a></li> -<li><a href="http://ebird.org/content/ebird/">eBird</a></li> -<li><a href="https://ecoengine.berkeley.edu/">Ecoengine</a></li> -<li><a href="http://vertnet.org/">Vertnet</a></li> -<li><a href="https://www.idigbio.org/">iDigBio</a></li> -</ul> - -<p>The following is a quick demo of getting iDigBio data in <code>spocc</code></p> - -<h2>Install</h2> - -<p>Get updated versions of <code>rgbif</code> and <code>ridigbio</code> first. And get <code>leaflet</code> to make an interactive map.</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">devtools<span class="o">::</span>install_github<span class="p">(</span><span class="s">&quot;ropensci/rgbif&quot;</span><span class="p">,</span> <span class="s">&quot;iDigBio/ridigbio&quot;</span><span class="p">,</span> <span class="s">&quot;rstudio/leaflet&quot;</span><span class="p">)</span> -devtools<span class="o">::</span>install_github<span class="p">(</span><span class="s">&quot;ropensci/spocc&quot;</span><span class="p">)</span> -</code></pre></div><div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;spocc&quot;</span><span class="p">)</span> -</code></pre></div> -<h2>Use ridigbio - the R client for iDigBio</h2> -<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;ridigbio&quot;</span><span class="p">)</span> -idig_search_records<span class="p">(</span>rq <span class="o">=</span> <span class="kt">list</span><span class="p">(</span>genus <span class="o">=</span> <span class="s">&quot;acer&quot;</span><span class="p">),</span> limit <span class="o">=</span> <span class="m">5</span><span class="p">)</span> -<span class="c1">#&gt; uuid</span> -<span class="c1">#&gt; 1 00041678-5df1-4a23-ba78-8c12f60af369</span> -<span class="c1">#&gt; 2 00072caf-0f24-447f-b68e-a20299f6afc7</span> -<span class="c1">#&gt; 3 000a6b9b-0bbd-46f6-82cb-848c30c46313</span> -<span class="c1">#&gt; 4 001d05e0-9c86-466d-957d-e73e2ce64fbe</span> -<span class="c1">#&gt; 5 0022a2da-bc97-4bef-b2a5-b8a9944fc677</span> -<span class="c1">#&gt; occurrenceid catalognumber family</span> -<span class="c1">#&gt; 1 urn:uuid:b275f928-5c0d-4832-ae82-fde363d8fde1 &lt;NA&gt; sapindaceae</span> -<span class="c1">#&gt; 2 40428b90-27a5-11e3-8d47-005056be0003 lsu00049997 aceraceae</span> -<span class="c1">#&gt; 3 02ca5aae-d8ab-492f-af10-e005b96c2295 191243 sapindaceae</span> -<span class="c1">#&gt; 4 urn:catalog:cas:ds:679715 ds679715 sapindaceae</span> -<span class="c1">#&gt; 5 b12bd651-2c6b-11e3-b3b8-180373cac83e 41898 sapindaceae</span> -<span class="c1">#&gt; genus scientificname country stateprovince geopoint.lat</span> -<span class="c1">#&gt; 1 acer acer rubrum united states illinois &lt;NA&gt;</span> -<span class="c1">#&gt; 2 acer acer negundo united states louisiana &lt;NA&gt;</span> -<span class="c1">#&gt; 3 acer &lt;NA&gt; united states new york &lt;NA&gt;</span> -<span class="c1">#&gt; 4 acer acer circinatum united states california 41.8714</span> -<span class="c1">#&gt; 5 acer acer rubrum united states maryland 39.4197222</span> -<span class="c1">#&gt; geopoint.lon datecollected collector</span> -<span class="c1">#&gt; 1 &lt;NA&gt; 1967-06-25T00:00:00+00:00 john e. ebinger</span> -<span class="c1">#&gt; 2 &lt;NA&gt; 1991-04-19T00:00:00+00:00 alan w. lievens</span> -<span class="c1">#&gt; 3 &lt;NA&gt; &lt;NA&gt; stephen f. hilfiker</span> -<span class="c1">#&gt; 4 -123.8503 1930-10-27T00:00:00+00:00 carl b. wolf</span> -<span class="c1">#&gt; 5 -77.1227778 1980-04-29T00:00:00+00:00 doweary, d.</span> -</code></pre></div> -<h2>Use spocc</h2> - -<h3>Scientific name search</h3> - -<p>Same search as above with <code>ridigbio</code></p> -<div class="highlight"><pre><code class="language-r" data-lang="r">occ<span class="p">(</span>query <span class="o">=</span> <span class="s">&quot;Acer&quot;</span><span class="p">,</span> from <span class="o">=</span> <span class="s">&quot;idigbio&quot;</span><span class="p">,</span> limit <span class="o">=</span> <span class="m">5</span><span class="p">)</span> -<span class="c1">#&gt; Searched: idigbio</span> -<span class="c1">#&gt; Occurrences - Found: 379, Returned: 5</span> -<span class="c1">#&gt; Search type: Scientific</span> -<span class="c1">#&gt; idigbio: Acer (5)</span> -</code></pre></div> -<h3>Geographic search</h3> - -<p>iDigBio uses Elasticsearch syntax to define a geographic search, but all you need to do is give a numeric vector of length 4 defining a bounding box, and you&#39;re good to go. </p> -<div class="highlight"><pre><code class="language-r" data-lang="r">bounds <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="m">-120</span><span class="p">,</span> <span class="m">40</span><span class="p">,</span> <span class="m">-100</span><span class="p">,</span> <span class="m">45</span><span class="p">)</span> -occ<span class="p">(</span>from <span class="o">=</span> <span class="s">&quot;idigbio&quot;</span><span class="p">,</span> geometry <span class="o">=</span> bounds<span class="p">,</span> limit <span class="o">=</span> <span class="m">10</span><span class="p">)</span> -<span class="c1">#&gt; Searched: idigbio</span> -<span class="c1">#&gt; Occurrences - Found: 346,737, Returned: 10</span> -<span class="c1">#&gt; Search type: Geometry</span> -</code></pre></div> -<h3>W/ or W/O Coordinates</h3> - -<p>Don&#39;t pass <code>has_coords</code> (gives data w/ and w/o coordinates data)</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">occ<span class="p">(</span>query <span class="o">=</span> <span class="s">&quot;Acer&quot;</span><span class="p">,</span> from <span class="o">=</span> <span class="s">&quot;idigbio&quot;</span><span class="p">,</span> limit <span class="o">=</span> <span class="m">5</span><span class="p">)</span> -<span class="c1">#&gt; Searched: idigbio</span> -<span class="c1">#&gt; Occurrences - Found: 379, Returned: 5</span> -<span class="c1">#&gt; Search type: Scientific</span> -<span class="c1">#&gt; idigbio: Acer (5)</span> -</code></pre></div> -<p>Only records with coordinates data</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">occ<span class="p">(</span>query <span class="o">=</span> <span class="s">&quot;Acer&quot;</span><span class="p">,</span> from <span class="o">=</span> <span class="s">&quot;idigbio&quot;</span><span class="p">,</span> limit <span class="o">=</span> <span class="m">5</span><span class="p">,</span> has_coords <span class="o">=</span> <span class="kc">TRUE</span><span class="p">)</span> -<span class="c1">#&gt; Searched: idigbio</span> -<span class="c1">#&gt; Occurrences - Found: 16, Returned: 5</span> -<span class="c1">#&gt; Search type: Scientific</span> -<span class="c1">#&gt; idigbio: Acer (5)</span> -</code></pre></div> -<p>Only records without coordinates data</p> -<div class="highlight"><pre><code class="language-r" data-lang="r">occ<span class="p">(</span>query <span class="o">=</span> <span class="s">&quot;Acer&quot;</span><span class="p">,</span> from <span class="o">=</span> <span class="s">&quot;idigbio&quot;</span><span class="p">,</span> limit <span class="o">=</span> <span class="m">5</span><span class="p">,</span> has_coords <span class="o">=</span> <span class="kc">FALSE</span><span class="p">)</span> -<span class="c1">#&gt; Searched: idigbio</span> -<span class="c1">#&gt; Occurrences - Found: 363, Returned: 5</span> -<span class="c1">#&gt; Search type: Scientific</span> -<span class="c1">#&gt; idigbio: Acer (5)</span> -</code></pre></div> -<h3>Make an interactive map</h3> -<div class="highlight"><pre><code class="language-r" data-lang="r"><span class="kn">library</span><span class="p">(</span><span class="s">&quot;leaflet&quot;</span><span class="p">)</span> -bounds <span class="o">&lt;-</span> <span class="kt">c</span><span class="p">(</span><span class="m">-120</span><span class="p">,</span> <span class="m">40</span><span class="p">,</span> <span class="m">-100</span><span class="p">,</span> <span class="m">45</span><span class="p">)</span> -leaflet<span class="p">(</span>data <span class="o">=</span> dat<span class="p">)</span> <span class="o">%&gt;%</span> - addTiles<span class="p">()</span> <span class="o">%&gt;%</span> - addMarkers<span class="p">(</span><span class="o">~</span>longitude<span class="p">,</span> <span class="o">~</span>latitude<span class="p">,</span> popup <span class="o">=</span> <span class="o">~</span>name<span class="p">)</span> <span class="o">%&gt;%</span> - addRectangles<span class="p">(</span> - lng1 <span class="o">=</span> bounds<span class="p">[</span><span class="m">1</span><span class="p">],</span> lat1 <span class="o">=</span> bounds<span class="p">[</span><span class="m">4</span><span class="p">],</span> - lng2 <span class="o">=</span> bounds<span class="p">[</span><span class="m">3</span><span class="p">],</span> lat2 <span class="o">=</span> bounds<span class="p">[</span><span class="m">2</span><span class="p">],</span> - fillColor <span class="o">=</span> <span class="s">&quot;transparent&quot;</span> - <span class="p">)</span> -</code></pre></div> -<p><img src="/public/img/2015-06-08-idigbio-in-spocc/plot.png" alt="image"></p> - - 2015-06-08 00:00:00 -0700 - http://recology.info//2015/06/idigbio-in-spocc/ - - diff --git a/_site/index.html b/_site/index.html index 65ea8be343..59a6b0e8ec 100644 --- a/_site/index.html +++ b/_site/index.html @@ -59,6 +59,279 @@

    Recology

      +
    +

    + + binomen - Tools for slicing and dicing taxonomic names + +

    + + + +

    The first version of binomen is now up on [CRAN][binomecran]. It provides various taxonomic classes for defining a single taxon, multiple taxa, and a taxonomic data.frame. It is designed as a companion to taxize, where you can get taxonomic data on taxonomic names from the web.

    + +

    The classes (S3):

    + +
      +
    • taxon
    • +
    • taxonref
    • +
    • taxonrefs
    • +
    • binomial
    • +
    • grouping (i.e., classification - used different term to avoid conflict with classification in taxize)
    • +
    + +

    For example, the binomial class is defined by a genus, epithet, authority, and optional full species name and canonical version.

    +
    binomial("Poa", "annua", authority="L.")
    +
    <binomial>
    +  genus: Poa
    +  epithet: annua
    +  canonical:
    +  species:
    +  authority: L.
    +
    +

    The package has a suite of functions to work on these taxonomic classes:

    + +
      +
    • gethier() - get hierarchy from a taxon class
    • +
    • scatter() - make each row in taxonomic data.frame (taxondf) a separate taxon object within a single taxa object
    • +
    • assemble() - make a taxa object into a taxondf data.frame
    • +
    • pick() - pick out one or more taxonomic groups
    • +
    • pop() - pop out (drop) one or more taxonomic groups
    • +
    • span() - pick a range between two taxonomic groups (inclusive)
    • +
    • strain() - filter by taxonomic groups, like dplyr's filter
    • +
    • name() - get the taxon name for each taxonref object
    • +
    • uri() - get the reference uri for each taxonref object
    • +
    • rank() - get the taxonomic rank for each taxonref object
    • +
    • id() - get the reference uri for each taxonref object
    • +
    + +

    The approach in this package I suppose is sort of like split-apply-combine from plyr/dplyr, whereas this is aims to make it easy to do with taxonomic names.

    + +

    Install

    + +

    For examples below, you'll need the development version:

    +
    install.packages("binomen")
    +
    library("binomen")
    +
    +

    Make a taxon

    + +

    Make a taxon object

    +
    (obj <- make_taxon(genus="Poa", epithet="annua", authority="L.",
    +  family='Poaceae', clazz='Poales', kingdom='Plantae', variety='annua'))
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     family: Poaceae
    +#>     genus: Poa
    +#>     species: Poa annua
    +#>     variety: annua
    +
    +

    Index to various parts of the object

    + +

    The binomial

    +
    obj$binomial
    +#> <binomial>
    +#>   genus: Poa
    +#>   epithet: annua
    +#>   canonical: Poa annua
    +#>   species: Poa annua L.
    +#>   authority: L.
    +
    +

    The authority

    +
    obj$binomial$authority
    +#> [1] "L."
    +
    +

    The classification

    +
    obj$grouping
    +#> <grouping>
    +#>   kingdom: Plantae
    +#>   clazz: Poales
    +#>   family: Poaceae
    +#>   genus: Poa
    +#>   species: Poa annua
    +#>   variety: annua
    +
    +

    The family

    +
    obj$grouping$family
    +#> <taxonref>
    +#>   rank: family
    +#>   name: Poaceae
    +#>   id: none
    +#>   uri: none
    +
    +

    Subset taxon objects

    + +

    Get one or more ranks via pick()

    +
    obj %>% pick(family)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     family: Poaceae
    +obj %>% pick(family, genus)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     family: Poaceae
    +#>     genus: Poa
    +
    +

    Drop one or more ranks via pop()

    +
    obj %>% pop(family)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     genus: Poa
    +#>     species: Poa annua
    +#>     variety: annua
    +obj %>% pop(family, genus)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     species: Poa annua
    +#>     variety: annua
    +
    +

    Get a range of ranks via span()

    +
    obj %>% span(kingdom, family)
    +#> <taxon>
    +#>   binomial: Poa annua
    +#>   grouping: 
    +#>     kingdom: Plantae
    +#>     clazz: Poales
    +#>     family: Poaceae
    +
    +

    Extract classification as a data.frame

    +
    gethier(obj)
    +#>      rank      name
    +#> 1 kingdom   Plantae
    +#> 2   clazz    Poales
    +#> 3  family   Poaceae
    +#> 4   genus       Poa
    +#> 5 species Poa annua
    +#> 6 variety     annua
    +
    +

    Taxonomic data.frame's

    + +

    Make one

    +
    df <- data.frame(order = c('Asterales','Asterales','Fagales','Poales','Poales','Poales'),
    +  family = c('Asteraceae','Asteraceae','Fagaceae','Poaceae','Poaceae','Poaceae'),
    +  genus = c('Helianthus','Helianthus','Quercus','Poa','Festuca','Holodiscus'),
    +  stringsAsFactors = FALSE)
    +(df2 <- taxon_df(df))
    +#>       order     family      genus
    +#> 1 Asterales Asteraceae Helianthus
    +#> 2 Asterales Asteraceae Helianthus
    +#> 3   Fagales   Fagaceae    Quercus
    +#> 4    Poales    Poaceae        Poa
    +#> 5    Poales    Poaceae    Festuca
    +#> 6    Poales    Poaceae Holodiscus
    +
    +

    Parse - get rank order via pick()

    +
    df2 %>% pick(order)
    +#>       order
    +#> 1 Asterales
    +#> 2 Asterales
    +#> 3   Fagales
    +#> 4    Poales
    +#> 5    Poales
    +#> 6    Poales
    +
    +

    get ranks order, family, and genus via pick()

    +
    df2 %>% pick(order, family, genus)
    +#>       order     family      genus
    +#> 1 Asterales Asteraceae Helianthus
    +#> 2 Asterales Asteraceae Helianthus
    +#> 3   Fagales   Fagaceae    Quercus
    +#> 4    Poales    Poaceae        Poa
    +#> 5    Poales    Poaceae    Festuca
    +#> 6    Poales    Poaceae Holodiscus
    +
    +

    get range of names via span(), from rank X to rank Y

    +
    df2 %>% span(family, genus)
    +#>       family      genus
    +#> 1 Asteraceae Helianthus
    +#> 2 Asteraceae Helianthus
    +#> 3   Fagaceae    Quercus
    +#> 4    Poaceae        Poa
    +#> 5    Poaceae    Festuca
    +#> 6    Poaceae Holodiscus
    +
    +

    Separate each row into a taxon class (many taxon objects are a taxa class)

    +
    scatter(df2)
    +#> [[1]]
    +#> <taxon>
    +#>   binomial: Helianthus none
    +#>   grouping: 
    +#>     order: Asterales
    +#>     family: Asteraceae
    +#>     genus: Helianthus
    +#>     species: Helianthus none
    +#> 
    +#> [[2]]
    +#> <taxon>
    +#>   binomial: Helianthus none
    +#>   grouping: 
    +#>     order: Asterales
    +#>     family: Asteraceae
    +#>     genus: Helianthus
    +#>     species: Helianthus none
    +#> 
    +#> [[3]]
    +#> <taxon>
    +#>   binomial: Quercus none
    +#>   grouping: 
    +#>     order: Fagales
    +#>     family: Fagaceae
    +#>     genus: Quercus
    +#>     species: Quercus none
    +#> 
    +#> [[4]]
    +#> <taxon>
    +#>   binomial: Poa none
    +#>   grouping: 
    +#>     order: Poales
    +#>     family: Poaceae
    +#>     genus: Poa
    +#>     species: Poa none
    +#> 
    +#> [[5]]
    +#> <taxon>
    +#>   binomial: Festuca none
    +#>   grouping: 
    +#>     order: Poales
    +#>     family: Poaceae
    +#>     genus: Festuca
    +#>     species: Festuca none
    +#> 
    +#> [[6]]
    +#> <taxon>
    +#>   binomial: Holodiscus none
    +#>   grouping: 
    +#>     order: Poales
    +#>     family: Poaceae
    +#>     genus: Holodiscus
    +#>     species: Holodiscus none
    +#> 
    +#> attr(,"class")
    +#> [1] "taxa"
    +
    +

    And you can re-assemble a data.frame from the output of scatter() with assemble()

    +
    out <- scatter(df2)
    +assemble(out)
    +#>       order     family      genus         species
    +#> 1 Asterales Asteraceae Helianthus Helianthus none
    +#> 2 Asterales Asteraceae Helianthus Helianthus none
    +#> 3   Fagales   Fagaceae    Quercus    Quercus none
    +#> 4    Poales    Poaceae        Poa        Poa none
    +#> 5    Poales    Poaceae    Festuca    Festuca none
    +#> 6    Poales    Poaceae Holodiscus Holodiscus none
    +
    +
    +

    @@ -384,177 +657,6 @@

    feedback

    -
    -

    - - noaa - Integrated Surface Database data - -

    - - - -

    I've recently made some improvements to the functions that work with ISD -(Integrated Surface Database) data.

    - -

    isd data

    - -
      -
    • The isd() function now caches more intelligently. We now cache using -.rds files via saveRDS/readRDS, whereas we used to use .csv files, -which take up much more disk space, and we have to worry about not changing -data formats on reading data back into an R session. This has the downside -that you can't just go directly to open up a cached file in your favorite -spreadsheet viewer, but you can do that manually after reading in to R.
    • -
    • In addition, isd() now has a function cleanup, if TRUE after -downloading the data file from NOAA's ftp server and processing, we delete -the file. That's fine since we have the cached processed file. But you -can choose not to cleanup the original data files.
    • -
    • Data processing in isd() is improved as well. We convert key variables -to appropriate classes to be more useful.
    • -
    - -

    isd stations

    - -
      -
    • In isd_stations(), there's now a cached version of the station data in -the package, or you can get optionally get fresh station data from NOAA's -FTP server.
    • -
    • There's a new function isd_stations_search() that uses the station data -to allow you to search for stations via either: - -
        -
      • A bounding box
      • -
      • Radius froma point
      • -
    • -
    - -

    Install

    - -

    For examples below, you'll need the development version:

    -
    devtools::install_github("ropensci/rnoaa")
    -
    -

    Load rnoaa

    -
    library("rnoaa")
    -
    -

    ISD stations

    - -

    Get stations

    - -

    There's a cached version of the station data in the package, or you can get fresh -station data from NOAA's FTP server.

    -
    stations <- isd_stations()
    -head(stations)
    -#>   usaf  wban station_name ctry state icao lat lon elev_m    begin      end
    -#> 1 7005 99999   CWOS 07005                  NA  NA     NA 20120127 20120127
    -#> 2 7011 99999   CWOS 07011                  NA  NA     NA 20111025 20121129
    -#> 3 7018 99999   WXPOD 7018                   0   0   7018 20110309 20130730
    -#> 4 7025 99999   CWOS 07025                  NA  NA     NA 20120127 20120127
    -#> 5 7026 99999   WXPOD 7026   AF              0   0   7026 20120713 20141120
    -#> 6 7034 99999   CWOS 07034                  NA  NA     NA 20121024 20121106
    -
    -

    Filter and visualize stations

    - -

    In addition to getting the entire station data.frame, you can also search for stations, -either with a bounding box or within a radius from a point. First, the bounding box

    -
    bbox <- c(-125.0, 38.4, -121.8, 40.9)
    -out <- isd_stations_search(bbox = bbox)
    -head(out)
    -#>     usaf  wban                          station_name ctry state icao
    -#> 1 720193 99999 LONNIE POOL FLD / WEAVERVILLE AIRPORT   US    CA KO54
    -#> 2 724834 99999                        POINT CABRILLO   US    CA     
    -#> 3 724953 99999                              RIO NIDO   US    CA     
    -#> 4 724957 23213                 SONOMA COUNTY AIRPORT   US    CA KSTS
    -#> 5 724957 99999                  C M SCHULZ SONOMA CO   US    CA KSTS
    -#> 6 724970 99999                  CHICO CALIFORNIA MAP   US    CA  CIC
    -#>   elev_m    begin      end      lon    lat
    -#> 1  716.0 20101030 20150831 -122.922 40.747
    -#> 2   20.0 19810906 19871007 -123.820 39.350
    -#> 3 -999.0 19891111 19900303 -122.917 38.517
    -#> 4   34.8 20000101 20150831 -122.810 38.504
    -#> 5   38.0 19430404 19991231 -122.817 38.517
    -#> 6   69.0 19420506 19760305 -121.850 39.783
    -
    -

    Where is the bounding box? (you'll need lawn, or you can vizualize some other way)

    -
    library("lawn")
    -lawn::lawn_bbox_polygon(bbox) %>% view
    -
    -

    plot1

    - -

    Vizualize station subset - yep, looks right

    -
    library("leaflet")
    -leaflet(data = out) %>%
    -  addTiles() %>%
    -  addCircles()
    -
    -

    plot1

    - -

    Next, search with a lat/lon coordinate, with a radius. That is, we search for stations -within X km from the coordinate.

    -
    out <- isd_stations_search(lat = 38.4, lon = -123, radius = 250)
    -head(out)
    -#>     usaf  wban             station_name ctry state icao elev_m    begin
    -#> 1 690070 93217            FRITZSCHE AAF   US    CA KOAR   43.0 19600404
    -#> 2 720267 23224 AUBURN MUNICIPAL AIRPORT   US    CA KAUN  466.7 20060101
    -#> 3 720267 99999         AUBURN MUNICIPAL   US    CA KAUN  468.0 20040525
    -#> 4 720406 99999      GNOSS FIELD AIRPORT   US    CA KDVO    0.6 20071114
    -#> 5 720576   174       UNIVERSITY AIRPORT   US    CA KEDU   21.0 20130101
    -#> 6 720576 99999                    DAVIS   US    CA KEDU   21.0 20080721
    -#>        end      lon    lat
    -#> 1 19930831 -121.767 36.683
    -#> 2 20150831 -121.082 38.955
    -#> 3 20051231 -121.082 38.955
    -#> 4 20150831 -122.550 38.150
    -#> 5 20150831 -121.783 38.533
    -#> 6 20121231 -121.783 38.533
    -
    -

    Again, compare search area to stations found

    - -

    search area

    -
    pt <- lawn::lawn_point(c(-123, 38.4))
    -lawn::lawn_buffer(pt, dist = 250) %>% view
    -
    -

    plot1

    - -

    stations found

    -
    leaflet(data = out) %>%
    -  addTiles() %>%
    -  addCircles()
    -
    -

    plot1

    - -

    ISD data

    - -

    Get ISD data

    - -

    Here, I get data for four stations.

    -
    res1 <- isd(usaf="011690", wban="99999", year=1993)
    -res2 <- isd(usaf="172007", wban="99999", year=2015)
    -res3 <- isd(usaf="702700", wban="00489", year=2015)
    -res4 <- isd(usaf="109711", wban=99999, year=1970)
    -
    -

    Then, combine data, with rnoaa:::rbind.isd()

    -
    res_all <- rbind(res1, res2, res3, res4)
    -
    -

    Add date time

    -
    library("lubridate")
    -res_all$date_time <- ymd_hm(
    -  sprintf("%s %s", as.character(res_all$date), res_all$time)
    -)
    -
    -

    Remove 999's (NOAA's way to indicate missing/no data)

    -
    library("dplyr")
    -res_all <- res_all %>% filter(temperature < 900)
    -
    -

    Visualize ISD data

    -
    library("ggplot2")
    -ggplot(res_all, aes(date_time, temperature)) +
    -  geom_line() + 
    -  facet_wrap(~usaf_station, scales = "free_x")
    -
    -

    img

    - -
    -
    diff --git a/_site/page10/index.html b/_site/page10/index.html index 5fc04e1d0b..936b67ded9 100644 --- a/_site/page10/index.html +++ b/_site/page10/index.html @@ -59,6 +59,172 @@

    Recology

      +
    +

    + + pytaxize - low level ITIS functions + +

    + + + + I've been working on a Python port of the R package `taxize` that I maintain. It's still early days with this Python library, I'd love to know what people think. For example, I'm giving back Pandas DataFrame's from most functions. Does this make sense? + +## Installation + +``` +sudo pip install git+git://github.com/sckott/pytaxize.git#egg=pytaxize +``` + +Or `git clone` the repo down, and `python setup.py build && python setup.py install` + +## Load library + +```python +import pytaxize +``` + +## ITIS ping + +```python +pytaxize.itis_ping() +``` + +```python +'This is the ITIS Web Service, providing access to the data behind www.itis.gov. The database contains 665,266 scientific names (501,207 of them valid/accepted) and 122,735 common names.' +``` + +## Get hierarchy down from tsn + +```python +pytaxize.gethierarchydownfromtsn(tsn = 161030) +``` + +```python + tsn rankName taxonName parentName parentTsn +0 161048 Class Sarcopterygii Osteichthyes 161030 +1 161061 Class Actinopterygii Osteichthyes 161030 +``` + +## Get hierarchy up from tsn + +```python +pytaxize.gethierarchyupfromtsn(tsn = 37906) +``` + +```python + author parentName parentTsn rankName taxonName tsn +0 Gaertn. ex Schreb. Asteraceae 35420 Genus Liatris 37906 +``` + +## Get rank names + +```python +pytaxize.getranknames() +``` + +```python + kingdomname rankid rankname +0 Bacteria 10 Kingdom +1 Bacteria 20 Subkingdom +2 Bacteria 30 Phylum +3 Bacteria 40 Subphylum +4 Bacteria 50 Superclass +5 Bacteria 60 Class +6 Bacteria 70 Subclass +7 Bacteria 80 Infraclass +8 Bacteria 90 Superorder +9 Bacteria 100 Order +10 Bacteria 110 Suborder +11 Bacteria 120 Infraorder +12 Bacteria 130 Superfamily +13 Bacteria 140 Family +14 Bacteria 150 Subfamily +15 Bacteria 160 Tribe +16 Bacteria 170 Subtribe +17 Bacteria 180 Genus +18 Bacteria 190 Subgenus +19 Bacteria 220 Species +20 Bacteria 230 Subspecies +21 Protozoa 10 Kingdom +22 Protozoa 20 Subkingdom +23 Protozoa 25 Infrakingdom +24 Protozoa 30 Phylum +25 Protozoa 40 Subphylum +26 Protozoa 45 Infraphylum +27 Protozoa 47 Parvphylum +28 Protozoa 50 Superclass +29 Protozoa 60 Class +.. ... ... ... +150 Chromista 190 Subgenus +151 Chromista 200 Section +152 Chromista 210 Subsection +153 Chromista 220 Species +154 Chromista 230 Subspecies +155 Chromista 240 Variety +156 Chromista 250 Subvariety +157 Chromista 260 Form +158 Chromista 270 Subform +159 Archaea 10 Kingdom +160 Archaea 20 Subkingdom +161 Archaea 30 Phylum +162 Archaea 40 Subphylum +163 Archaea 50 Superclass +164 Archaea 60 Class +165 Archaea 70 Subclass +166 Archaea 80 Infraclass +167 Archaea 90 Superorder +168 Archaea 100 Order +169 Archaea 110 Suborder +170 Archaea 120 Infraorder +171 Archaea 130 Superfamily +172 Archaea 140 Family +173 Archaea 150 Subfamily +174 Archaea 160 Tribe +175 Archaea 170 Subtribe +176 Archaea 180 Genus +177 Archaea 190 Subgenus +178 Archaea 220 Species +179 Archaea 230 Subspecies +``` + +## Search by scientific name + +```python +pytaxize.searchbyscientificname(x="Tardigrada") +``` + +```python + combinedname tsn +0 Rotaria tardigrada 58274 +1 Notommata tardigrada 58898 +2 Pilargis tardigrada 65562 +3 Tardigrada 155166 +4 Heterotardigrada 155167 +5 Arthrotardigrada 155168 +6 Mesotardigrada 155358 +7 Eutardigrada 155362 +8 Scytodes tardigrada 866744 +``` + +## Get accepted names from tsn + +```python +pytaxize.getacceptednamesfromtsn('208527') +``` + +If accepted, returns the same id + +```python +'208527' +``` + +## More + +For the other functions see https://github.com/sckott/pytaxize/blob/master/pytaxize/itis.py + +
    +

    @@ -68,111 +234,131 @@

    -

    I was in San Francisco last week for an altmetrics conference at PLOS. While there, I visited the Asian Art Museum, just the Roads of Arabia exhibition.

    - -

    It was a great exhibit. While I was looking at the pieces, I read many labels, and thought, "hey, what if someone wants this metadata"?

    - -

    Since we have an R package in development for scraping museum metadata (called musemeta), I just started some scraping code for this museum. Unfortunately, I don't think the pieces from the Roads of Arabia exhibit are on their site, so no metadata to get. But they do have their main collection searchable online at http://www.asianart.org/collections/collection. Examples follow.

    - -

    Installation

    -
    install.packages("devtools")
    -devtools::install_github("ropensci/musemeta")
    -
    library("musemeta")
    -
    -

    Get metadata for a single object

    - -

    You have to get the ID for the piece from their website, e.g., 11462 from the url http://searchcollection.asianart.org/view/objects/asitem/nid/11462. Once you have an ID you can pass it in ot the aam() function.

    -
    (out <- aam(11462))
    -#> <AAM metadata> Molded plaque (tsha tsha)
    -#>   Object id: 1992.96
    -#>   Object name: Votive plaque
    -#>   Date: approx. 1992
    -#>   Artist: 
    -#>   Medium: Plaster mixed with resin and pigment
    -#>   Credit line: Gift of Robert Tevis
    -#>   On display?: no
    -#>   Collection: Decorative Arts
    -#>   Department: Himalayan Art
    -#>   Dimensions: 
    -#>   Label: Molded plaques (tsha tshas) are small sacred images, flat or
    -#>           three-dimensional, shaped out of clay in metal molds. The
    -#>           images are usually unbaked, and sometimes seeds, paper, or
    -#>           human ashes were mixed with the clay. Making tsha tshas is
    -#>           a meritorious act, and monasteries give them away to
    -#>           pilgrims. Some Tibetans carry tsha tshas inside the amulet
    -#>           boxes they wear or stuff them into larger images as part of
    -#>           the consecration of those images. In Bhutan tsha tshas are
    -#>           found in mani walls (a wall of stones carved with prayers)
    -#>           or piled up in caves.The practice of making such plaques
    -#>           began in India, and from there it spread to other countries
    -#>           in Asia with the introduction of Buddhism. Authentic tsha
    -#>           tshas are cast from clay. Modern examples , such as those
    -#>           made for the tourist trade in Tibet, are made of plaster
    -#>           and cast from ancient (1100-1200) molds and hand colored to
    -#>           give them the appearance of age.
    -
    -

    The output is printed for clarity, but you can dig into each element, like

    -
    out$label
    -#> [1] "Molded plaques (tsha tshas) are small sacred images, flat or three-dimensional, shaped out of clay in metal molds. The images are usually unbaked, and sometimes seeds, paper, or human ashes were mixed with the clay. Making tsha tshas is a meritorious act, and monasteries give them away to pilgrims. Some Tibetans carry tsha tshas inside the amulet boxes they wear or stuff them into larger images as part of the consecration of those images. In Bhutan tsha tshas are found in mani walls (a wall of stones carved with prayers) or piled up in caves.The practice of making such plaques began in India, and from there it spread to other countries in Asia with the introduction of Buddhism. Authentic tsha tshas are cast from clay. Modern examples , such as those made for the tourist trade in Tibet, are made of plaster and cast from ancient (1100-1200) molds and hand colored to give them the appearance of age."
    -
    -

    Get metadata for many objects

    - -

    The aam() function is not vectorized, but you can easily get data for many IDs via lapply type functions, etc.

    -
    lapply(c(17150,17140,17144), aam)
    -#> [[1]]
    -#> <AAM metadata> Boys sumo wrestling
    -#>   Object id: 2005.100.35
    -#>   Object name: Woodblock print
    -#>   Date: approx. 1769
    -#>   Artist: Suzuki HarunobuJapanese, 1724 - 1770
    -#>   Medium: Ink and colors on paper
    -#>   Credit line: Gift of the Grabhorn Ukiyo-e Collection
    -#>   On display?: no
    -#>   Collection: Prints And Drawings
    -#>   Department: Japanese Art
    -#>   Dimensions: H. 12 5/8 in x W. 5 3/4 in, H. 32.1 cm x W. 14.6 cm
    -#>   Label: 40 —é木–Ø春t信M 相'Š撲–o—Vび‚ÑSuzuki Harunobu, 1725?–1770Boys sumo wrestling ( Sumō
    -#>           ?)c. 1769Woodblock print ( nishiki-e) Hosoban
    -#> 
    -#> [[2]]
    -#> <AAM metadata> Autumn Moon of Matsukaze
    -#>   Object id: 2005.100.25
    -#>   Object name: Woodblock print
    -#>   Date: 1768-1769
    -#>   Artist: Suzuki HarunobuJapanese, 1724 - 1770
    -#>   Medium: Ink and colors on paper
    -#>   Credit line: Gift of the Grabhorn Ukiyo-e Collection
    -#>   On display?: no
    -#>   Collection: Prints And Drawings
    -#>   Department: Japanese Art
    -#>   Dimensions: H. 12 1/2 in x W. 5 3/4 in, H. 31.7 cm x W. 14.6 cm
    -#>   Label: 30 —é木–Ø春t信M 『w•—流—¬æ…八"ª景Œi』x 「u松¼•—の‚Ì秋H月ŒŽ」vSuzuki Harunobu, 1725?–1770"Autumn Moon of
    -#>           Matsukaze" (Matsukaze no shū ?)From Fashionable Eight Views
    -#>           of Noh Chants (Fū ?ū ?1768–1769Woodblock print
    -#>           (nishiki-e)Hosoban
    -#> 
    -#> [[3]]
    -#> <AAM metadata> Hunting for fireflies
    -#>   Object id: 2005.100.29
    -#>   Object name: Woodblock print
    -#>   Date: 1767-1768
    -#>   Artist: Suzuki HarunobuJapanese, 1724 - 1770
    -#>   Medium: Ink and colors on paper
    -#>   Credit line: Gift of the Grabhorn Ukiyo-e Collection
    -#>   On display?: no
    -#>   Collection: Prints And Drawings
    -#>   Department: Japanese Art
    -#>   Dimensions: H. 10 1/2 in x W. 8 in, H. 26.7 cm x W. 20.3 cm
    -#>   Label: 34 —é木–Ø春t信M Œu狩Žëり‚èSuzuki Harunobu, 1725?–1770Hunting for
    -#>           fireflies1767–1768Woodblock print ( nishiki-e) Chū ?
    -
    -

    No search, boo

    - -

    Note that there is no search functionality yet for this source. Maybe someone can add that via pull requests :)

    - -

    Like the others

    - -

    The others sources in musemeta mostly work the same way as the above.

    + I was in San Francisco last week for an altmetrics conference at PLOS. While there, I visited the [Asian Art Museum](http://www.asianart.org/), just the [Roads of Arabia exhibition](http://www.asianart.org/exhibitions_index/roads-of-arabia). + +It was a great exhibit. While I was looking at the pieces, I read many labels, and thought, "hey, what if someone wants this metadata"? + +Since we have an R package in development for scraping museum metadata (called [musemeta](https://github.com/ropensci/musemeta)), I just started some scraping code for this museum. Unfortunately, I don't think the pieces from the Roads of Arabia exhibit are on their site, so no metadata to get. But they do have their main collection searchable online at [http://www.asianart.org/collections/collection](http://www.asianart.org/collections/collection). Examples follow. + +## Installation + + +```r +install.packages("devtools") +devtools::install_github("ropensci/musemeta") +``` + + +```r +library("musemeta") +``` + +## Get metadata for a single object + +You have to get the ID for the piece from their website, e.g., `11462` from the url `http://searchcollection.asianart.org/view/objects/asitem/nid/11462`. Once you have an ID you can pass it in ot the `aam()` function. + + +```r +(out <- aam(11462)) +#> Molded plaque (tsha tsha) +#> Object id: 1992.96 +#> Object name: Votive plaque +#> Date: approx. 1992 +#> Artist: +#> Medium: Plaster mixed with resin and pigment +#> Credit line: Gift of Robert Tevis +#> On display?: no +#> Collection: Decorative Arts +#> Department: Himalayan Art +#> Dimensions: +#> Label: Molded plaques (tsha tshas) are small sacred images, flat or +#> three-dimensional, shaped out of clay in metal molds. The +#> images are usually unbaked, and sometimes seeds, paper, or +#> human ashes were mixed with the clay. Making tsha tshas is +#> a meritorious act, and monasteries give them away to +#> pilgrims. Some Tibetans carry tsha tshas inside the amulet +#> boxes they wear or stuff them into larger images as part of +#> the consecration of those images. In Bhutan tsha tshas are +#> found in mani walls (a wall of stones carved with prayers) +#> or piled up in caves.The practice of making such plaques +#> began in India, and from there it spread to other countries +#> in Asia with the introduction of Buddhism. Authentic tsha +#> tshas are cast from clay. Modern examples , such as those +#> made for the tourist trade in Tibet, are made of plaster +#> and cast from ancient (1100-1200) molds and hand colored to +#> give them the appearance of age. +``` + +The output is printed for clarity, but you can dig into each element, like + + +```r +out$label +#> [1] "Molded plaques (tsha tshas) are small sacred images, flat or three-dimensional, shaped out of clay in metal molds. The images are usually unbaked, and sometimes seeds, paper, or human ashes were mixed with the clay. Making tsha tshas is a meritorious act, and monasteries give them away to pilgrims. Some Tibetans carry tsha tshas inside the amulet boxes they wear or stuff them into larger images as part of the consecration of those images. In Bhutan tsha tshas are found in mani walls (a wall of stones carved with prayers) or piled up in caves.The practice of making such plaques began in India, and from there it spread to other countries in Asia with the introduction of Buddhism. Authentic tsha tshas are cast from clay. Modern examples , such as those made for the tourist trade in Tibet, are made of plaster and cast from ancient (1100-1200) molds and hand colored to give them the appearance of age." +``` + +## Get metadata for many objects + +The `aam()` function is not vectorized, but you can easily get data for many IDs via `lapply` type functions, etc. + + +```r +lapply(c(17150,17140,17144), aam) +#> [[1]] +#> Boys sumo wrestling +#> Object id: 2005.100.35 +#> Object name: Woodblock print +#> Date: approx. 1769 +#> Artist: Suzuki HarunobuJapanese, 1724 - 1770 +#> Medium: Ink and colors on paper +#> Credit line: Gift of the Grabhorn Ukiyo-e Collection +#> On display?: no +#> Collection: Prints And Drawings +#> Department: Japanese Art +#> Dimensions: H. 12 5/8 in x W. 5 3/4 in, H. 32.1 cm x W. 14.6 cm +#> Label: 40 —é木–Ø春t信M 相'Š撲–o—Vび‚ÑSuzuki Harunobu, 1725?–1770Boys sumo wrestling ( Sumō +#> ?)c. 1769Woodblock print ( nishiki-e) Hosoban +#> +#> [[2]] +#> Autumn Moon of Matsukaze +#> Object id: 2005.100.25 +#> Object name: Woodblock print +#> Date: 1768-1769 +#> Artist: Suzuki HarunobuJapanese, 1724 - 1770 +#> Medium: Ink and colors on paper +#> Credit line: Gift of the Grabhorn Ukiyo-e Collection +#> On display?: no +#> Collection: Prints And Drawings +#> Department: Japanese Art +#> Dimensions: H. 12 1/2 in x W. 5 3/4 in, H. 31.7 cm x W. 14.6 cm +#> Label: 30 —é木–Ø春t信M 『w•—流—¬æ…八"ª景Œi』x 「u松¼•—の‚Ì秋H月ŒŽ」vSuzuki Harunobu, 1725?–1770"Autumn Moon of +#> Matsukaze" (Matsukaze no shū ?)From Fashionable Eight Views +#> of Noh Chants (Fū ?ū ?1768–1769Woodblock print +#> (nishiki-e)Hosoban +#> +#> [[3]] +#> Hunting for fireflies +#> Object id: 2005.100.29 +#> Object name: Woodblock print +#> Date: 1767-1768 +#> Artist: Suzuki HarunobuJapanese, 1724 - 1770 +#> Medium: Ink and colors on paper +#> Credit line: Gift of the Grabhorn Ukiyo-e Collection +#> On display?: no +#> Collection: Prints And Drawings +#> Department: Japanese Art +#> Dimensions: H. 10 1/2 in x W. 8 in, H. 26.7 cm x W. 20.3 cm +#> Label: 34 —é木–Ø春t信M Œu狩Žëり‚èSuzuki Harunobu, 1725?–1770Hunting for +#> fireflies1767–1768Woodblock print ( nishiki-e) Chū ? +``` + +## No search, boo + +Note that there is no search functionality yet for this source. Maybe someone can add that via pull requests :) + +## Like the others + +The others sources in `musemeta` mostly work the same way as the above.
    @@ -185,206 +371,164 @@

    -

    The Lagotto application is a Rails app that collects and serves up via RESTful API article level metrics data for research objects. So far, this application has only been applied to scholarly articles, but will see action on datasets soon.

    + The Lagotto application is a Rails app that collects and serves up via RESTful API article level metrics data for research objects. So far, this application has only been applied to scholarly articles, but will [see action on datasets soon](http://articlemetrics.github.io/MDC/). + +[Martin Fenner](http://blog.martinfenner.org/) has lead the development of Lagotto. He recently set up [a discussion site](http://discuss.lagotto.io/) if you want to chat about it. + +The application has a [nice GUI interface](http://alm.plos.org/), and a quite nice [RESTful API](http://alm.plos.org/docs/api). + +Lagotto is open source! Because of this, and the quality of the software, other publishers have started using it to gather and deliver publicly article level metrics data, including: + +* [eLife](http://lagotto.svr.elifesciences.org/) +* [Public Knowledge Project (PKP)](http://pkp-alm.lib.sfu.ca/) +* [Copernicus](http://metricus.copernicus.org/) +* [Crossref](http://det.labs.crossref.org/) +* [Pensoft](http://alm.pensoft.net:81/) -

    Martin Fenner has lead the development of Lagotto. He recently set up a discussion site if you want to chat about it.

    +The PLOS instance at [http://alm.plos.org/](http://alm.plos.org/) is always the most up to date with the Lagotto software, but [Crossref](http://det.labs.crossref.org/) has the largest number of articles. -

    The application has a nice GUI interface, and a quite nice RESTful API.

    +I've been working on three clients for the Lagotto REST API, including for a while now on `R`, recently on `Python`, and just last week on `Ruby`. -

    Lagotto is open source! Because of this, and the quality of the software, other publishers have started using it to gather and deliver publicly article level metrics data, including:

    +Please do try the clients, report bugs, request features - you know the open source drill... - +I'd say the R client is the most mature, while Python is less so, end the Ruby gem the least mature. -

    The PLOS instance at http://alm.plos.org/ is always the most up to date with the Lagotto software, but Crossref has the largest number of articles.

    +## Installation -

    I've been working on three clients for the Lagotto REST API, including for a while now on R, recently on Python, and just last week on Ruby.

    +R -

    Please do try the clients, report bugs, request features - you know the open source drill...

    -

    I'd say the R client is the most mature, while Python is less so, end the Ruby gem the least mature.

    +```r +install.packages("devtools") +devtools::install_github("ropensci/alm") +``` -

    Installation

    +Python -

    R

    -
    install.packages("devtools")
    -devtools::install_github("ropensci/alm")
    -
    -

    Python

    -
    git clone https://github.com/cameronneylon/pyalm.git
    +
    +```r
    +git clone https://github.com/cameronneylon/pyalm.git
     cd pyalm
     git checkout scott
     python setup.py install
    -
    -

    Ruby

    -
    gem install httparty json rake
    -git clone https://github.com/sckott/alm.git
    -cd alm
    -make # which runs build and install tasks
    -
    -

    If you don't have make, then just run gem build alm.gemspec and gem install alm-0.1.0.gem seperately.

    - -

    Example

    - -

    In this example, we'll get altmetrics data for two DOIs: 10.1371/journal.pone.0029797, and 10.1371/journal.pone.0029798 (click on links to go to paper).

    - -

    R

    -
    library('alm')
    -ids <- c("10.1371/journal.pone.0029797","10.1371/journal.pone.0029798")
    -alm_ids(ids, info="summary")
    -#> $meta
    -#>   total total_pages page error
    -#> 1     2           1    1    NA
    -#> 
    -#> $data
    -#> $data$`10.1371/journal.pone.0029798`
    -#> $data$`10.1371/journal.pone.0029798`$info
    -#>                            doi
    -#> 1 10.1371/journal.pone.0029798
    -#>                                                                                     title
    -#> 1 Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol
    -#>                                                                canonical_url
    -#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798
    -#>       pmid   pmcid                        mendeley_uuid
    -#> 1 22253786 3256171 b08cc99e-b526-3f0c-adaa-d5ee6d0d978a
    -#>            update_date     issued
    -#> 1 2014-12-09T02:52:47Z 2012-01-11
    -#> 
    -#> $data$`10.1371/journal.pone.0029798`$signposts
    -#>                            doi viewed saved discussed cited
    -#> 1 10.1371/journal.pone.0029798   4346    14         2    26
    -#> 
    -#> 
    -#> $data$`10.1371/journal.pone.0029797`
    -#> $data$`10.1371/journal.pone.0029797`$info
    -#>                            doi
    -#> 1 10.1371/journal.pone.0029797
    -#>                                                                             title
    -#> 1 Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate
    -#>                                                                canonical_url
    -#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797
    -#>       pmid   pmcid                        mendeley_uuid
    -#> 1 22253785 3256195 897fbbd6-5a23-3552-8077-97251b82c1e1
    -#>            update_date     issued
    -#> 1 2014-12-09T02:52:46Z 2012-01-11
    -#> 
    -#> $data$`10.1371/journal.pone.0029797`$signposts
    -#>                            doi viewed saved discussed cited
    -#> 1 10.1371/journal.pone.0029797  34282    81       244     8
    -
    -

    Python

    -
    import pyalm
    -ids = ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"]
    -pyalm.get_alm(ids, info="summary")
    -
    -#> {'articles': [<ArticleALM Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol,
    -#> DOI 10.1371/journal.pone.0029798>,
    -#>   <ArticleALM Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate,
    -#>         DOI 10.1371/journal.pone.0029797>],
    -#>  'meta': {u'error': None, u'page': 1, u'total': 2, u'total_pages': 1}}
    -
    -

    Ruby

    -
    require 'alm'
    -Alm.alm(ids: ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"], key: ENV['PLOS_API_KEY'])
    -
    -#> => {"total"=>2,
    -#>  "total_pages"=>1,
    -#>  "page"=>1,
    -#>  "error"=>nil,
    -#>  "data"=>
    -#>   [{"doi"=>"10.1371/journal.pone.0029798",
    -#>     "title"=>"Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol",
    -#>     "issued"=>{"date-parts"=>[[2012, 1, 11]]},
    -#>     "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798",
    -#>     "pmid"=>"22253786",
    -#>     "pmcid"=>"3256171",
    -#>     "mendeley_uuid"=>"b08cc99e-b526-3f0c-adaa-d5ee6d0d978a",
    -#>     "viewed"=>4346,
    -#>     "saved"=>14,
    -#>     "discussed"=>2,
    -#>     "cited"=>26,
    -#>     "update_date"=>"2014-12-09T02:52:47Z"},
    -#>    {"doi"=>"10.1371/journal.pone.0029797",
    -#>     "title"=>"Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate",
    -#>     "issued"=>{"date-parts"=>[[2012, 1, 11]]},
    -#>     "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797",
    -#>     "pmid"=>"22253785",
    -#>     "pmcid"=>"3256195",
    -#>     "mendeley_uuid"=>"897fbbd6-5a23-3552-8077-97251b82c1e1",
    -#>     "viewed"=>34282,
    -#>     "saved"=>81,
    -#>     "discussed"=>244,
    -#>     "cited"=>8,
    -#>     "update_date"=>"2014-12-09T02:52:46Z"}]}
    -
    -
    - -
    -

    - - Dealing with multi handle errors - -

    +``` - +Ruby -

    At rOpenSci we occasssionally hear from our users that they run into an error like:

    -
    Error in function (type, msg, asError = TRUE)  : 
    -  easy handled already used in multi handle
    -
    -

    This error occurs in the httr package that we use to do http requests to sources of data on the web. It happens when e.g., you make a lot of requests to a resource, then it gets interrupted somehow - then you make another call, and you get the error above. Let's try it with the an version of httr (v0.5):

    -
    library("httr")
    -# run, then esc to cause multi handle error
    -replicate(50, GET("http://google.com/"))
    -# then retry single call, which trows multi handle error
    -GET("http://google.com/")
    -#> Error in function (type, msg, asError = TRUE)  : 
    -#>   easy handled already used in multi handle
    -
    -

    There are any number of reasons why your session may get interrupted, including an internet outage, the web service you are requesesting data from times out, etc. There hasn't been a straight-forward way to handle this, until recently.

    - -

    In httr version 0.6, there are two new functions handle_find() and handle_reset() to help deal with this error.

    - -

    First, install newest httr from Github

    -
    install.packages("devtools")
    -devtools::install_github("hadley/httr")
    -
    library("httr")
    -
    -

    Make a bunch of requests to google, interrupting part way through

    -
    replicate(50, HEAD("http://google.com/"))
    -
    -

    Then retry single call, which trows multi handle error

    -
    HEAD("http://google.com/")
    -#> Error in function (type, msg, asError = TRUE)  : 
    -#>   easy handled already used in multi handle
    -
    -

    Find handle

    -
    handle_find("http://google.com/")
    -#> Host: http://google.com/ <0x10f3d1600>
    -
    -

    Reset handle

    -
    handle_reset("http://google.com/")
    -
    -

    Try call again, this time it should work

    -
    HEAD("http://google.com/")
    -#> Response [http://www.google.com/]
    -#>   Date: 2014-12-08 13:37
    -#>   Status: 200
    -#>   Content-Type: text/html; charset=ISO-8859-1
    -#> <EMPTY BODY>
    -
    -

    Usage in ropensci packages

    - -

    We have more work to do yet to integrate this into our packages. It's great you can reset a handle as above, but to reset the handle you need to search for the URL used in the request, which our users would have to dig into the code for the function they are using. That is easy-ish to do, but perhaps not everyone knows they can get to the code easily. So, we may try seting a parameter in functions that would let reset the handle to clear this error.

    - -

    Note

    - -

    Note that Hadley is planning on eliminating RCurl dependency (https://github.com/hadley/httr/issues/172), so there may be a different solution in the future.

    + +```r +gem install httparty json rake +git clone https://github.com/sckott/alm.git +cd alm +make # which runs build and install tasks +``` + +If you don't have `make`, then just run `gem build alm.gemspec` and `gem install alm-0.1.0.gem` seperately. + +## Example + +In this example, we'll get altmetrics data for two DOIs: [10.1371/journal.pone.0029797](http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797), and [10.1371/journal.pone.0029798](http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798) (click on links to go to paper). + +### R + + +```r +library('alm') +ids <- c("10.1371/journal.pone.0029797","10.1371/journal.pone.0029798") +alm_ids(ids, info="summary") +#> $meta +#> total total_pages page error +#> 1 2 1 1 NA +#> +#> $data +#> $data$`10.1371/journal.pone.0029798` +#> $data$`10.1371/journal.pone.0029798`$info +#> doi +#> 1 10.1371/journal.pone.0029798 +#> title +#> 1 Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol +#> canonical_url +#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798 +#> pmid pmcid mendeley_uuid +#> 1 22253786 3256171 b08cc99e-b526-3f0c-adaa-d5ee6d0d978a +#> update_date issued +#> 1 2014-12-09T02:52:47Z 2012-01-11 +#> +#> $data$`10.1371/journal.pone.0029798`$signposts +#> doi viewed saved discussed cited +#> 1 10.1371/journal.pone.0029798 4346 14 2 26 +#> +#> +#> $data$`10.1371/journal.pone.0029797` +#> $data$`10.1371/journal.pone.0029797`$info +#> doi +#> 1 10.1371/journal.pone.0029797 +#> title +#> 1 Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate +#> canonical_url +#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797 +#> pmid pmcid mendeley_uuid +#> 1 22253785 3256195 897fbbd6-5a23-3552-8077-97251b82c1e1 +#> update_date issued +#> 1 2014-12-09T02:52:46Z 2012-01-11 +#> +#> $data$`10.1371/journal.pone.0029797`$signposts +#> doi viewed saved discussed cited +#> 1 10.1371/journal.pone.0029797 34282 81 244 8 +``` + +### Python + + +```r +import pyalm +ids = ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"] +pyalm.get_alm(ids, info="summary") + +#> {'articles': [ DOI 10.1371/journal.pone.0029798>, +#> DOI 10.1371/journal.pone.0029797>], +#> 'meta': {u'error': None, u'page': 1, u'total': 2, u'total_pages': 1}} +``` + +### Ruby + + +```r +require 'alm' +Alm.alm(ids: ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"], key: ENV['PLOS_API_KEY']) + +#> => {"total"=>2, +#> "total_pages"=>1, +#> "page"=>1, +#> "error"=>nil, +#> "data"=> +#> [{"doi"=>"10.1371/journal.pone.0029798", +#> "title"=>"Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol", +#> "issued"=>{"date-parts"=>[[2012, 1, 11]]}, +#> "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798", +#> "pmid"=>"22253786", +#> "pmcid"=>"3256171", +#> "mendeley_uuid"=>"b08cc99e-b526-3f0c-adaa-d5ee6d0d978a", +#> "viewed"=>4346, +#> "saved"=>14, +#> "discussed"=>2, +#> "cited"=>26, +#> "update_date"=>"2014-12-09T02:52:47Z"}, +#> {"doi"=>"10.1371/journal.pone.0029797", +#> "title"=>"Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate", +#> "issued"=>{"date-parts"=>[[2012, 1, 11]]}, +#> "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797", +#> "pmid"=>"22253785", +#> "pmcid"=>"3256195", +#> "mendeley_uuid"=>"897fbbd6-5a23-3552-8077-97251b82c1e1", +#> "viewed"=>34282, +#> "saved"=>81, +#> "discussed"=>244, +#> "cited"=>8, +#> "update_date"=>"2014-12-09T02:52:46Z"}]} +```
    diff --git a/_site/page11/index.html b/_site/page11/index.html index 25774d7ddc..4529b5369c 100644 --- a/_site/page11/index.html +++ b/_site/page11/index.html @@ -59,6 +59,105 @@

    Recology

      +
    +

    + + Dealing with multi handle errors + +

    + + + + At rOpenSci we occasssionally hear from our users that they run into an error like: + +```r +Error in function (type, msg, asError = TRUE) : + easy handled already used in multi handle +``` + +This error occurs in the `httr` package that we use to do http requests to sources of data on the web. It happens when e.g., you make a lot of requests to a resource, then it gets interrupted somehow - then you make another call, and you get the error above. Let's try it with the an version of `httr` (`v0.5`): + + +```r +library("httr") +# run, then esc to cause multi handle error +replicate(50, GET("http://google.com/")) +# then retry single call, which trows multi handle error +GET("http://google.com/") +#> Error in function (type, msg, asError = TRUE) : +#> easy handled already used in multi handle +``` + +There are any number of reasons why your session may get interrupted, including an internet outage, the web service you are requesesting data from times out, etc. There hasn't been a straight-forward way to handle this, until recently. + +In `httr` version `0.6`, there are two new functions `handle_find()` and `handle_reset()` to help deal with this error. + +First, install newest httr from Github + + +```r +install.packages("devtools") +devtools::install_github("hadley/httr") +``` + + +```r +library("httr") +``` + +Make a bunch of requests to google, interrupting part way through + + +```r +replicate(50, HEAD("http://google.com/")) +``` + +Then retry single call, which trows multi handle error + + +```r +HEAD("http://google.com/") +#> Error in function (type, msg, asError = TRUE) : +#> easy handled already used in multi handle +``` + +Find handle + + +```r +handle_find("http://google.com/") +#> Host: http://google.com/ <0x10f3d1600> +``` + +Reset handle + + +```r +handle_reset("http://google.com/") +``` + +Try call again, this time it should work + + +```r +HEAD("http://google.com/") +#> Response [http://www.google.com/] +#> Date: 2014-12-08 13:37 +#> Status: 200 +#> Content-Type: text/html; charset=ISO-8859-1 +#> +``` + +## Usage in ropensci packages + +We have more work to do yet to integrate this into our packages. It's great you can reset a handle as above, but to reset the handle you need to search for the URL used in the request, which our users would have to dig into the code for the function they are using. That is easy-ish to do, but perhaps not everyone knows they can get to the code easily. So, we may try seting a parameter in functions that would let reset the handle to clear this error. + +## Note + +Note that Hadley is planning on eliminating `RCurl` dependency (https://github.com/hadley/httr/issues/172), so there may be a different solution in the future. + +
    +

    @@ -68,143 +167,165 @@

    -

    The Lagotto application is a Rails app that collects and serves up via RESTful API article level metrics data for research objects. So far, this application has only been applied to scholarly articles, but will see action on datasets soon.

    + The Lagotto application is a Rails app that collects and serves up via RESTful API article level metrics data for research objects. So far, this application has only been applied to scholarly articles, but will [see action on datasets soon](http://articlemetrics.github.io/MDC/). + +[Martin Fenner](http://blog.martinfenner.org/) has lead the development of Lagotto. He recently set up [a discussion site](http://discuss.lagotto.io/) if you want to chat about it. + +The application has a [nice GUI interface](http://alm.plos.org/), and a quite nice [RESTful API](http://alm.plos.org/docs/api). + +Lagotto is open source! Because of this, and the quality of the software, other publishers have started using it to gather and deliver publicly article level metrics data, including: -

    Martin Fenner has lead the development of Lagotto. He recently set up a discussion site if you want to chat about it.

    +* [eLife](http://lagotto.svr.elifesciences.org/) +* [Public Knowledge Project (PKP)](http://pkp-alm.lib.sfu.ca/) +* [Copernicus](http://metricus.copernicus.org/) +* [Crossref](http://det.labs.crossref.org/) +* [Pensoft](http://alm.pensoft.net:81/) -

    The application has a nice GUI interface, and a quite nice RESTful API.

    +The PLOS instance at [http://alm.plos.org/](http://alm.plos.org/) is always the most up to date with the Lagotto software, but [Crossref](http://det.labs.crossref.org/) has the largest number of articles. -

    Lagotto is open source! Because of this, and the quality of the software, other publishers have started using it to gather and deliver publicly article level metrics data, including:

    +I've been working on three clients for the Lagotto REST API, including for a while now on `R`, recently on `Python`, and just last week on `Ruby`. - +Please do try the clients, report bugs, request features - you know the open source drill... -

    The PLOS instance at http://alm.plos.org/ is always the most up to date with the Lagotto software, but Crossref has the largest number of articles.

    +I'd say the R client is the most mature, while Python is less so, end the Ruby gem the least mature. -

    I've been working on three clients for the Lagotto REST API, including for a while now on R, recently on Python, and just last week on Ruby.

    +## Installation -

    Please do try the clients, report bugs, request features - you know the open source drill...

    +R -

    I'd say the R client is the most mature, while Python is less so, end the Ruby gem the least mature.

    -

    Installation

    +```r +install.packages("devtools") +devtools::install_github("ropensci/alm") +``` -

    R

    -
    install.packages("devtools")
    -devtools::install_github("ropensci/alm")
    -
    -

    Python

    -
    git clone https://github.com/cameronneylon/pyalm.git
    +Python
    +
    +
    +```r
    +git clone https://github.com/cameronneylon/pyalm.git
     cd pyalm
     git checkout scott
     python setup.py install
    -
    -

    Ruby

    -
    gem install httparty json rake
    -git clone https://github.com/sckott/alm.git
    +```
    +
    +Ruby
    +
    +
    +```r
    +gem install httparty json rake
    +git clone https://github.com/sckott/alm.git
     cd alm
    -make # which runs build and install tasks
    -
    -

    If you don't have make, then just run gem build alm.gemspec and gem install alm-0.1.0.gem seperately.

    - -

    Example

    - -

    In this example, we'll get altmetrics data for two DOIs: 10.1371/journal.pone.0029797, and 10.1371/journal.pone.0029798 (click on links to go to paper).

    - -

    R

    -
    library('alm')
    -ids <- c("10.1371/journal.pone.0029797","10.1371/journal.pone.0029798")
    -alm_ids(ids, info="summary")
    -#> $meta
    -#>   total total_pages page error
    -#> 1     2           1    1    NA
    -#> 
    -#> $data
    -#> $data$`10.1371/journal.pone.0029798`
    -#> $data$`10.1371/journal.pone.0029798`$info
    -#>                            doi
    -#> 1 10.1371/journal.pone.0029798
    -#>                                                                                     title
    -#> 1 Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol
    -#>                                                                canonical_url
    -#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798
    -#>       pmid   pmcid                        mendeley_uuid
    -#> 1 22253786 3256171 b08cc99e-b526-3f0c-adaa-d5ee6d0d978a
    -#>            update_date     issued
    -#> 1 2014-12-09T02:52:47Z 2012-01-11
    -#> 
    -#> $data$`10.1371/journal.pone.0029798`$signposts
    -#>                            doi viewed saved discussed cited
    -#> 1 10.1371/journal.pone.0029798   4346    14         2    26
    -#> 
    -#> 
    -#> $data$`10.1371/journal.pone.0029797`
    -#> $data$`10.1371/journal.pone.0029797`$info
    -#>                            doi
    -#> 1 10.1371/journal.pone.0029797
    -#>                                                                             title
    -#> 1 Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate
    -#>                                                                canonical_url
    -#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797
    -#>       pmid   pmcid                        mendeley_uuid
    -#> 1 22253785 3256195 897fbbd6-5a23-3552-8077-97251b82c1e1
    -#>            update_date     issued
    -#> 1 2014-12-09T02:52:46Z 2012-01-11
    -#> 
    -#> $data$`10.1371/journal.pone.0029797`$signposts
    -#>                            doi viewed saved discussed cited
    -#> 1 10.1371/journal.pone.0029797  34282    81       244     8
    -
    -

    Python

    -
    import pyalm
    -ids = ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"]
    -pyalm.get_alm(ids, info="summary")
    -
    -#> {'articles': [<ArticleALM Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol,
    -#> DOI 10.1371/journal.pone.0029798>,
    -#>   <ArticleALM Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate,
    -#>         DOI 10.1371/journal.pone.0029797>],
    -#>  'meta': {u'error': None, u'page': 1, u'total': 2, u'total_pages': 1}}
    -
    -

    Ruby

    -
    require 'alm'
    -Alm.alm(ids: ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"], key: ENV['PLOS_API_KEY'])
    -
    -#> => {"total"=>2,
    -#>  "total_pages"=>1,
    -#>  "page"=>1,
    -#>  "error"=>nil,
    -#>  "data"=>
    -#>   [{"doi"=>"10.1371/journal.pone.0029798",
    -#>     "title"=>"Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol",
    -#>     "issued"=>{"date-parts"=>[[2012, 1, 11]]},
    -#>     "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798",
    -#>     "pmid"=>"22253786",
    -#>     "pmcid"=>"3256171",
    -#>     "mendeley_uuid"=>"b08cc99e-b526-3f0c-adaa-d5ee6d0d978a",
    -#>     "viewed"=>4346,
    -#>     "saved"=>14,
    -#>     "discussed"=>2,
    -#>     "cited"=>26,
    -#>     "update_date"=>"2014-12-09T02:52:47Z"},
    -#>    {"doi"=>"10.1371/journal.pone.0029797",
    -#>     "title"=>"Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate",
    -#>     "issued"=>{"date-parts"=>[[2012, 1, 11]]},
    -#>     "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797",
    -#>     "pmid"=>"22253785",
    -#>     "pmcid"=>"3256195",
    -#>     "mendeley_uuid"=>"897fbbd6-5a23-3552-8077-97251b82c1e1",
    -#>     "viewed"=>34282,
    -#>     "saved"=>81,
    -#>     "discussed"=>244,
    -#>     "cited"=>8,
    -#>     "update_date"=>"2014-12-09T02:52:46Z"}]}
    -
    +make # which runs build and install tasks +``` + +If you don't have `make`, then just run `gem build alm.gemspec` and `gem install alm-0.1.0.gem` seperately. + +## Example + +In this example, we'll get altmetrics data for two DOIs: [10.1371/journal.pone.0029797](http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797), and [10.1371/journal.pone.0029798](http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798) (click on links to go to paper). + +### R + + +```r +library('alm') +ids <- c("10.1371/journal.pone.0029797","10.1371/journal.pone.0029798") +alm_ids(ids, info="summary") +#> $meta +#> total total_pages page error +#> 1 2 1 1 NA +#> +#> $data +#> $data$`10.1371/journal.pone.0029798` +#> $data$`10.1371/journal.pone.0029798`$info +#> doi +#> 1 10.1371/journal.pone.0029798 +#> title +#> 1 Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol +#> canonical_url +#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798 +#> pmid pmcid mendeley_uuid +#> 1 22253786 3256171 b08cc99e-b526-3f0c-adaa-d5ee6d0d978a +#> update_date issued +#> 1 2014-12-09T02:52:47Z 2012-01-11 +#> +#> $data$`10.1371/journal.pone.0029798`$signposts +#> doi viewed saved discussed cited +#> 1 10.1371/journal.pone.0029798 4346 14 2 26 +#> +#> +#> $data$`10.1371/journal.pone.0029797` +#> $data$`10.1371/journal.pone.0029797`$info +#> doi +#> 1 10.1371/journal.pone.0029797 +#> title +#> 1 Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate +#> canonical_url +#> 1 http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797 +#> pmid pmcid mendeley_uuid +#> 1 22253785 3256195 897fbbd6-5a23-3552-8077-97251b82c1e1 +#> update_date issued +#> 1 2014-12-09T02:52:46Z 2012-01-11 +#> +#> $data$`10.1371/journal.pone.0029797`$signposts +#> doi viewed saved discussed cited +#> 1 10.1371/journal.pone.0029797 34282 81 244 8 +``` + +### Python + + +```r +import pyalm +ids = ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"] +pyalm.get_alm(ids, info="summary") + +#> {'articles': [ DOI 10.1371/journal.pone.0029798>, +#> DOI 10.1371/journal.pone.0029797>], +#> 'meta': {u'error': None, u'page': 1, u'total': 2, u'total_pages': 1}} +``` + +### Ruby + + +```r +require 'alm' +Alm.alm(ids: ["10.1371/journal.pone.0029797","10.1371/journal.pone.0029798"], key: ENV['PLOS_API_KEY']) + +#> => {"total"=>2, +#> "total_pages"=>1, +#> "page"=>1, +#> "error"=>nil, +#> "data"=> +#> [{"doi"=>"10.1371/journal.pone.0029798", +#> "title"=>"Mitochondrial Electron Transport Is the Cellular Target of the Oncology Drug Elesclomol", +#> "issued"=>{"date-parts"=>[[2012, 1, 11]]}, +#> "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029798", +#> "pmid"=>"22253786", +#> "pmcid"=>"3256171", +#> "mendeley_uuid"=>"b08cc99e-b526-3f0c-adaa-d5ee6d0d978a", +#> "viewed"=>4346, +#> "saved"=>14, +#> "discussed"=>2, +#> "cited"=>26, +#> "update_date"=>"2014-12-09T02:52:47Z"}, +#> {"doi"=>"10.1371/journal.pone.0029797", +#> "title"=>"Ecological Guild Evolution and the Discovery of the World's Smallest Vertebrate", +#> "issued"=>{"date-parts"=>[[2012, 1, 11]]}, +#> "canonical_url"=>"http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0029797", +#> "pmid"=>"22253785", +#> "pmcid"=>"3256195", +#> "mendeley_uuid"=>"897fbbd6-5a23-3552-8077-97251b82c1e1", +#> "viewed"=>34282, +#> "saved"=>81, +#> "discussed"=>244, +#> "cited"=>8, +#> "update_date"=>"2014-12-09T02:52:46Z"}]} +``` +
    @@ -216,320 +337,145 @@

    -

    I just missed another chat on the rOpenSci website:

    - -
    -

    I want to know the number of publications by people from a certain country, but I dont know how to achieve this...

    -
    - -

    Fun! Let's do that. It's a bit complicated because there is no field like geography of the authors. But there are affiliation fields, from which we can collect data we need.

    - -

    Installation

    - -

    You'll need the GitHub version for the coutry names data, or just use the CRAN version, and get country names elsewhere.

    -
    install.packages("devtools")
    -devtools::install_github("ropensci/rplos")
    -
    library("rplos")
    -
    -

    Get the data

    -
    articles <- searchplos(q='*:*', limit = 5,
    -    fl=c("id","author_affiliate"), 
    -    fq=list('article_type:"Research Article"', "doc_type:full"))
    -
    -

    Search for country names in affilitation field

    -
    (countries <- lapply(articles$data$author_affiliate, function(x){
    -  out <- sapply(isocodes$name, function(z) grepl(z, x))
    -  isocodes$name[out]
    -}))
    -#> [[1]]
    -#> character(0)
    -#> 
    -#> [[2]]
    -#> [1] "Jersey"        "United States"
    -#> 
    -#> [[3]]
    -#> [1] "China"   "Germany"
    -#> 
    -#> [[4]]
    -#> character(0)
    -#> 
    -#> [[5]]
    -#> [1] "Argentina"      "United Kingdom"
    -
    -

    You can combine this data with the previously collected data:

    -
    # Helper function
    -splitem <- function(x){
    -  if(length(x) == 0) { NA } else {
    -    if(length(x) > 1) paste0(x, collapse = ", ") else x
    -  }
    -}
    -
    -articles$data$countries <- sapply(countries, splitem)
    -head(articles$data)
    -#>                             id
    -#> 1 10.1371/journal.pone.0095870
    -#> 2 10.1371/journal.pone.0110535
    -#> 3 10.1371/journal.pone.0110991
    -#> 4 10.1371/journal.pone.0111234
    -#> 5 10.1371/journal.pone.0111388
    -#>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                author_affiliate
    -#> 1 Institute of Epidemiology and Preventive Medicine, College of Public Health, National Taiwan University, Taipei, Taiwan; Department of Clinical Laboratory Sciences and Medical Biotechnology, College of Medicine, National Taiwan University, Taipei, Taiwan; Department of Gastroenterology, Ren-Ai Branch, Taipei City Hospital, Taipei, Taiwan; Division of Gastroenterology, Department of Internal Medicine, National Taiwan University Hospital and National Taiwan University College of Medicine, Taipei, Taiwan; Liver Research Unit, Chang Gung Memorial Hospital, Chang Gung University College of Medicine, Taipei, Taiwan; Division of Gastroenterology, Department of Medicine, Taipei Veterans General Hospital, Taipei, Taiwan; Cheng Hsin General Hospital, Taipei, Taiwan
    -#> 2    Durham Nephrology Associates, Durham, North Carolina, United States of America; Scientific Activities Department, The National Kidney Foundation, Inc., New York, New York, United States of America; Covance Inc., Princeton, New Jersey, United States of America; Departments of Medicine and Population Health Sciences, University of Wisconsin School of Medicine and Public Health, Madison, Wisconsin, United States of America; Department of Family Medicine, University at Buffalo, Buffalo, New York, United States of America; Baylor Health Care System, Baylor Heart and Vascular Institute, Dallas, Texas, United States of America; Department of Medicine, Division of Nephrology, Icahn School of Medicine at Mount Sinai, New York, New York, United States of America
    -#> 3                                                                                                                                                                                                                                                                                                                                                                                                                                  State Key Laboratory of Electronic Thin Films and Integrated Devices, School of Microelectronics and Solid-State electronics, University of Electronic Science and Technology of China, Sichuan, China; Electrical and Computer Engineering, Kaiserslautern University of Technology, Kaiserslautern German Gottlieb-Daimler-Strabe, Kaiserslautern, Germany
    -#> 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         SB RAS Institute of Chemical Biology and Fundamental Medicine, Novosibirsk, Russia; Pacific Institute of Bioorganic Chemistry, Far East Division, Russian Academy of Sciences, Vladivostok, Russia; Novosibirsk State University, Novosibirsk, Russia
    -#> 5                                                                                                                                                                                                                                                                                                                                                                                   CONICET, Consejo Nacional de Investigaciones Científicas y Técnicas, Ciudad Autónoma de Buenos Aires, Buenos Aires, Argentina; INGEO, Instituto de Geología, Facultad de Ciencias Exactas, Físicas y Naturales, Universidad Nacional de San Juan, San Juan, San Juan, Argentina; School of Geography, Earth and Environmental Sciences, University of Birmingham, Birmingham, West Midlands, United Kingdom
    -#>                   countries
    -#> 1                      <NA>
    -#> 2     Jersey, United States
    -#> 3            China, Germany
    -#> 4                      <NA>
    -#> 5 Argentina, United Kingdom
    -
    -

    Bigger data set

    - -

    Okay, cool, lets do it on a bigger data set, and this time, we'll get another variable counter_total_all, which is the combination of page views/pdf downloads for each article. This will allow us to ask Is number of countries included in the authors related to page views?. I have no idea if this question makes sense, but nonetheless, it is a question :)

    -
    articles <- searchplos(q='*:*', limit = 1000,
    -    fl=c("id","counter_total_all","author_affiliate"), 
    -    fq=list('article_type:"Research Article"', "doc_type:full"))
    -#> 1 
    -#> 2
    -
    -

    Get countries

    -
    countries <- lapply(articles$data$author_affiliate, function(x){
    -  out <- sapply(isocodes$name, function(z) grepl(z, x))
    -  isocodes$name[out]
    -})
    -df <- articles$data
    -df$countries <- sapply(countries, splitem)
    -
    -

    Let's remove those rows with 0 countries, since the authors must be from somewhere, so the country name matching must have errored.

    -
    df$n_countries <- sapply(countries, length)
    -df <- df[ df$n_countries > 0, ]
    -
    -

    Plot data

    -
    library("ggplot2")
    -ggplot(df, aes(n_countries, as.numeric(counter_total_all))) +
    -  geom_point() +
    -  labs(y="total page views") + 
    -  theme_grey(base_size = 16)
    -
    -

    plot of chunk unnamed-chunk-10

    - -

    Conclusion: meh, maybe, maybe not

    - -

    Into rplos

    - -

    We'll probably add a function like this into rplos, as a convenient way to handle this use case.

    + I just missed another chat on the rOpenSci website: -
    - -
    -

    - - http codes - -

    +> I want to know the number of publications by people from a certain country, but I dont know how to achieve this... + +Fun! Let's do that. It's a bit complicated because there is no field like geography of the authors. But there are affiliation fields, from which we can collect data we need. + +## Installation + +You'll need the GitHub version for the coutry names data, or just use the CRAN version, and get country names elsewhere. + + +```r +install.packages("devtools") +devtools::install_github("ropensci/rplos") +``` + + +```r +library("rplos") +``` + +## Get the data + + +```r +articles <- searchplos(q='*:*', limit = 5, + fl=c("id","author_affiliate"), + fq=list('article_type:"Research Article"', "doc_type:full")) +``` + +## Search for country names in affilitation field + + +```r +(countries <- lapply(articles$data$author_affiliate, function(x){ + out <- sapply(isocodes$name, function(z) grepl(z, x)) + isocodes$name[out] +})) +#> [[1]] +#> character(0) +#> +#> [[2]] +#> [1] "Jersey" "United States" +#> +#> [[3]] +#> [1] "China" "Germany" +#> +#> [[4]] +#> character(0) +#> +#> [[5]] +#> [1] "Argentina" "United Kingdom" +``` + +You can combine this data with the previously collected data: + + +```r +# Helper function +splitem <- function(x){ + if(length(x) == 0) { NA } else { + if(length(x) > 1) paste0(x, collapse = ", ") else x + } +} + +articles$data$countries <- sapply(countries, splitem) +head(articles$data) +#> id +#> 1 10.1371/journal.pone.0095870 +#> 2 10.1371/journal.pone.0110535 +#> 3 10.1371/journal.pone.0110991 +#> 4 10.1371/journal.pone.0111234 +#> 5 10.1371/journal.pone.0111388 +#> author_affiliate +#> 1 Institute of Epidemiology and Preventive Medicine, College of Public Health, National Taiwan University, Taipei, Taiwan; Department of Clinical Laboratory Sciences and Medical Biotechnology, College of Medicine, National Taiwan University, Taipei, Taiwan; Department of Gastroenterology, Ren-Ai Branch, Taipei City Hospital, Taipei, Taiwan; Division of Gastroenterology, Department of Internal Medicine, National Taiwan University Hospital and National Taiwan University College of Medicine, Taipei, Taiwan; Liver Research Unit, Chang Gung Memorial Hospital, Chang Gung University College of Medicine, Taipei, Taiwan; Division of Gastroenterology, Department of Medicine, Taipei Veterans General Hospital, Taipei, Taiwan; Cheng Hsin General Hospital, Taipei, Taiwan +#> 2 Durham Nephrology Associates, Durham, North Carolina, United States of America; Scientific Activities Department, The National Kidney Foundation, Inc., New York, New York, United States of America; Covance Inc., Princeton, New Jersey, United States of America; Departments of Medicine and Population Health Sciences, University of Wisconsin School of Medicine and Public Health, Madison, Wisconsin, United States of America; Department of Family Medicine, University at Buffalo, Buffalo, New York, United States of America; Baylor Health Care System, Baylor Heart and Vascular Institute, Dallas, Texas, United States of America; Department of Medicine, Division of Nephrology, Icahn School of Medicine at Mount Sinai, New York, New York, United States of America +#> 3 State Key Laboratory of Electronic Thin Films and Integrated Devices, School of Microelectronics and Solid-State electronics, University of Electronic Science and Technology of China, Sichuan, China; Electrical and Computer Engineering, Kaiserslautern University of Technology, Kaiserslautern German Gottlieb-Daimler-Strabe, Kaiserslautern, Germany +#> 4 SB RAS Institute of Chemical Biology and Fundamental Medicine, Novosibirsk, Russia; Pacific Institute of Bioorganic Chemistry, Far East Division, Russian Academy of Sciences, Vladivostok, Russia; Novosibirsk State University, Novosibirsk, Russia +#> 5 CONICET, Consejo Nacional de Investigaciones Científicas y Técnicas, Ciudad Autónoma de Buenos Aires, Buenos Aires, Argentina; INGEO, Instituto de Geología, Facultad de Ciencias Exactas, Físicas y Naturales, Universidad Nacional de San Juan, San Juan, San Juan, Argentina; School of Geography, Earth and Environmental Sciences, University of Birmingham, Birmingham, West Midlands, United Kingdom +#> countries +#> 1 +#> 2 Jersey, United States +#> 3 China, Germany +#> 4 +#> 5 Argentina, United Kingdom +``` + +## Bigger data set + +Okay, cool, lets do it on a bigger data set, and this time, we'll get another variable `counter_total_all`, which is the combination of page views/pdf downloads for each article. This will allow us to ask _Is number of countries included in the authors related to page views?_. I have no idea if this question makes sense, but nonetheless, it is a question :) + + +```r +articles <- searchplos(q='*:*', limit = 1000, + fl=c("id","counter_total_all","author_affiliate"), + fq=list('article_type:"Research Article"', "doc_type:full")) +#> 1 +#> 2 +``` + +Get countries + + +```r +countries <- lapply(articles$data$author_affiliate, function(x){ + out <- sapply(isocodes$name, function(z) grepl(z, x)) + isocodes$name[out] +}) +df <- articles$data +df$countries <- sapply(countries, splitem) +``` + +Let's remove those rows with 0 countries, since the authors must be from somewhere, so the country name matching must have errored. + + +```r +df$n_countries <- sapply(countries, length) +df <- df[ df$n_countries > 0, ] +``` + +Plot data + + +```r +library("ggplot2") +ggplot(df, aes(n_countries, as.numeric(counter_total_all))) + + geom_point() + + labs(y="total page views") + + theme_grey(base_size = 16) +``` + +![plot of chunk unnamed-chunk-10](figure/unnamed-chunk-10-1.png) + +Conclusion: meh, maybe, maybe not + +## Into rplos + +We'll probably add a function like this into `rplos`, as a convenient way to handle this use case. - - -

    Recently noticed a little Python library called httpcode that does a simple thing: gives information on http codes in the CLI. I thought this could maybe potentially be useful for R. So I made an R version.

    - -

    Installation

    -
    devtools::install_github("sckott/httpcode")
    -
    library("httpcode")
    -
    -

    Search by http code

    -
    http_code(100)
    -#> <Status code: 100>
    -#>   Message: Continue
    -#>   Explanation: Request received, please continue
    -
    http_code(400)
    -#> <Status code: 400>
    -#>   Message: Bad Request
    -#>   Explanation: Bad request syntax or unsupported method
    -
    http_code(503)
    -#> <Status code: 503>
    -#>   Message: Service Unavailable
    -#>   Explanation: The server cannot process the request due to a high load
    -
    http_code(999)
    -#> Error: No description found for code: 999
    -
    -

    Fuzzy code search

    -
    http_code('1xx')
    -#> [[1]]
    -#> <Status code: 100>
    -#>   Message: Continue
    -#>   Explanation: Request received, please continue
    -#> 
    -#> [[2]]
    -#> <Status code: 101>
    -#>   Message: Switching Protocols
    -#>   Explanation: Switching to new protocol; obey Upgrade header
    -#> 
    -#> [[3]]
    -#> <Status code: 102>
    -#>   Message: Processing
    -#>   Explanation: WebDAV; RFC 2518
    -
    http_code('3xx')
    -#> [[1]]
    -#> <Status code: 300>
    -#>   Message: Multiple Choices
    -#>   Explanation: Object has several resources -- see URI list
    -#> 
    -#> [[2]]
    -#> <Status code: 301>
    -#>   Message: Moved Permanently
    -#>   Explanation: Object moved permanently -- see URI list
    -#> 
    -#> [[3]]
    -#> <Status code: 302>
    -#>   Message: Found
    -#>   Explanation: Object moved temporarily -- see URI list
    -#> 
    -#> [[4]]
    -#> <Status code: 303>
    -#>   Message: See Other
    -#>   Explanation: Object moved -- see Method and URL list
    -#> 
    -#> [[5]]
    -#> <Status code: 304>
    -#>   Message: Not Modified
    -#>   Explanation: Document has not changed since given time
    -#> 
    -#> [[6]]
    -#> <Status code: 305>
    -#>   Message: Use Proxy
    -#>   Explanation: You must use proxy specified in Location to access this resource.
    -#> 
    -#> [[7]]
    -#> <Status code: 306>
    -#>   Message: Switch Proxy
    -#>   Explanation: Subsequent requests should use the specified proxy
    -#> 
    -#> [[8]]
    -#> <Status code: 307>
    -#>   Message: Temporary Redirect
    -#>   Explanation: Object moved temporarily -- see URI list
    -#> 
    -#> [[9]]
    -#> <Status code: 308>
    -#>   Message: Permanent Redirect
    -#>   Explanation: Object moved permanently
    -
    http_code('30[12]')
    -#> [[1]]
    -#> <Status code: 301>
    -#>   Message: Moved Permanently
    -#>   Explanation: Object moved permanently -- see URI list
    -#> 
    -#> [[2]]
    -#> <Status code: 302>
    -#>   Message: Found
    -#>   Explanation: Object moved temporarily -- see URI list
    -
    http_code('30[34]')
    -#> [[1]]
    -#> <Status code: 303>
    -#>   Message: See Other
    -#>   Explanation: Object moved -- see Method and URL list
    -#> 
    -#> [[2]]
    -#> <Status code: 304>
    -#>   Message: Not Modified
    -#>   Explanation: Document has not changed since given time
    -
    -

    Search by text message

    -
    http_search("request")
    -#> [[1]]
    -#> <Status code: 100>
    -#>   Message: Continue
    -#>   Explanation: Request received, please continue
    -#> 
    -#> [[2]]
    -#> <Status code: 200>
    -#>   Message: OK
    -#>   Explanation: Request fulfilled, document follows
    -#> 
    -#> [[3]]
    -#> <Status code: 202>
    -#>   Message: Accepted
    -#>   Explanation: Request accepted, processing continues off-line
    -#> 
    -#> [[4]]
    -#> <Status code: 203>
    -#>   Message: Non-Authoritative Information
    -#>   Explanation: Request fulfilled from cache
    -#> 
    -#> [[5]]
    -#> <Status code: 204>
    -#>   Message: No Content
    -#>   Explanation: Request fulfilled, nothing follows
    -#> 
    -#> [[6]]
    -#> <Status code: 306>
    -#>   Message: Switch Proxy
    -#>   Explanation: Subsequent requests should use the specified proxy
    -#> 
    -#> [[7]]
    -#> <Status code: 400>
    -#>   Message: Bad Request
    -#>   Explanation: Bad request syntax or unsupported method
    -#> 
    -#> [[8]]
    -#> <Status code: 403>
    -#>   Message: Forbidden
    -#>   Explanation: Request forbidden -- authorization will not help
    -#> 
    -#> [[9]]
    -#> <Status code: 408>
    -#>   Message: Request Timeout
    -#>   Explanation: Request timed out; try again later.
    -#> 
    -#> [[10]]
    -#> <Status code: 409>
    -#>   Message: Conflict
    -#>   Explanation: Request conflict.
    -#> 
    -#> [[11]]
    -#> <Status code: 413>
    -#>   Message: Request Entity Too Large
    -#>   Explanation: Entity is too large.
    -#> 
    -#> [[12]]
    -#> <Status code: 414>
    -#>   Message: Request-URI Too Long
    -#>   Explanation: URI is too long.
    -#> 
    -#> [[13]]
    -#> <Status code: 416>
    -#>   Message: Requested Range Not Satisfiable
    -#>   Explanation: Cannot satisfy request range.
    -#> 
    -#> [[14]]
    -#> <Status code: 503>
    -#>   Message: Service Unavailable
    -#>   Explanation: The server cannot process the request due to a high load
    -#> 
    -#> [[15]]
    -#> <Status code: 505>
    -#>   Message: HTTP Version Not Supported
    -#>   Explanation: Cannot fulfill request.
    -
    http_search("forbidden")
    -#> [[1]]
    -#> <Status code: 403>
    -#>   Message: Forbidden
    -#>   Explanation: Request forbidden -- authorization will not help
    -
    http_search("too")
    -#> [[1]]
    -#> <Status code: 413>
    -#>   Message: Request Entity Too Large
    -#>   Explanation: Entity is too large.
    -#> 
    -#> [[2]]
    -#> <Status code: 414>
    -#>   Message: Request-URI Too Long
    -#>   Explanation: URI is too long.
    -
    http_search("birds")
    -#> Error: No status code found for search: : birds
    -
    diff --git a/_site/page12/index.html b/_site/page12/index.html index 111977a2ac..798ef160e1 100644 --- a/_site/page12/index.html +++ b/_site/page12/index.html @@ -61,739 +61,733 @@

    Recology

    - - taxize workflows + + http codes

    -

    A missed chat on the rOpenSci website the other day asked:

    - -
    -

    Hi there, i am trying to use the taxize package and have a .csv file of species names to run through taxize updating them. What would be the code i would need to run to achieve this?

    -
    - -

    One way to answer this is to talk about the basic approach to importing data, doing stuff to the data, then recombining data. There are many ways to do this, but I'll go over a few of them.

    - -

    Install taxize

    -
    install.packages("taxize")
    -install.packages("downloader")
    -
    library("taxize")
    -
    -

    Import data

    - -

    We'll use Winston Chang's new downloader package to avoid problems with https, and get a dataset from our ropensci datasets repo https://github.com/ropensci/datasets

    -
    downloader::download("https://raw.githubusercontent.com/ropensci/datasets/master/planttraits/morphological.csv", "morphological.csv")
    -dat <- read.csv("morphological.csv", stringsAsFactors = FALSE)
    -head(dat)
    -#>                  species log_SLA leaf_water_content log_wood_density
    -#> 1         Abies concolor    3.46               0.51            -0.52
    -#> 2          Abies grandis    3.58               0.49            -0.51
    -#> 3        Abies magnifica    3.87               0.62            -0.53
    -#> 4      Acacia farnesiana      NA                 NA               NA
    -#> 5           Acer glabrum    5.07               0.69            -0.54
    -#> 6 Adenostoma fasciculata    3.56               0.46            -0.31
    -#>   log_ht log_N
    -#> 1   7.72  0.02
    -#> 2   7.51 -0.31
    -#> 3   7.58 -0.14
    -#> 4   5.70    NA
    -#> 5   3.25  1.02
    -#> 6   5.33  0.29
    -
    -

    After importing data, there are a variety of approaches you could take:

    - -
      -
    1. Vector: Take species names as vector from your data.frame, cleaning them, then re-attching to the data.frame later, or
    2. -
    3. In-Place: Use for loops or lapply family functions to iterate over each name while simultaneously re-inserting into the data.frame
    4. -
    - -

    1. Vector

    - -

    Make a vector of names

    -
    splist <- dat$species
    -
    -

    Then proceed to do name cleaning, e.g, we can use the tnrs function to see if any names are potentially not spelled correctly.

    -
    tnrs_out <- tnrs(splist, source = "iPlant_TNRS")
    -head(tnrs_out)
    -#>              submittedname             acceptedname    sourceid score
    -#> 1     Ceanothus prostratus     Ceanothus prostratus iPlant_TNRS     1
    -#> 2          Abies magnifica          Abies magnifica iPlant_TNRS     1
    -#> 3 Arctostaphylos canescens Arctostaphylos canescens iPlant_TNRS     1
    -#> 4         Berberis nervosa         Berberis nervosa iPlant_TNRS     1
    -#> 5        Arbutus menziesii        Arbutus menziesii iPlant_TNRS     1
    -#> 6     Calocedrus decurrens     Calocedrus decurrens iPlant_TNRS     1
    -#>                matchedname      authority
    -#> 1     Ceanothus prostratus         Benth.
    -#> 2          Abies magnifica  A. Murray bis
    -#> 3 Arctostaphylos canescens         Eastw.
    -#> 4         Berberis nervosa          Pursh
    -#> 5        Arbutus menziesii          Pursh
    -#> 6     Calocedrus decurrens (Torr.) Florin
    -#>                                     uri
    -#> 1 http://www.tropicos.org/Name/27500276
    -#> 2 http://www.tropicos.org/Name/24900142
    -#> 3 http://www.tropicos.org/Name/12302547
    -#> 4  http://www.tropicos.org/Name/3500175
    -#> 5 http://www.tropicos.org/Name/12302436
    -#> 6  http://www.tropicos.org/Name/9400069
    -
    -

    Those with score of less than 1 may have misspellings

    -
    tnrs_out[ tnrs_out$score < 1, ]
    -#>                 submittedname              acceptedname    sourceid score
    -#> 23     Adenostoma fasciculata   Adenostoma fasciculatum iPlant_TNRS  0.97
    -#> 24 Arctostaphylos glandulosus Arctostaphylos glandulosa iPlant_TNRS  0.97
    -#> 36        Chamaebatia foliosa     Chamaebatia foliolosa iPlant_TNRS  0.95
    -#> 38     Juniperus californicus     Juniperus californica iPlant_TNRS  0.97
    -#> 77         Prunus illicifolia         Prunus ilicifolia iPlant_TNRS  0.99
    -#> 78         Prunus subcordatus         Prunus subcordata iPlant_TNRS  0.97
    -#>                  matchedname                         authority
    -#> 23   Adenostoma fasciculatum                      Hook. & Arn.
    -#> 24 Arctostaphylos glandulosa                            Eastw.
    -#> 36     Chamaebatia foliolosa                            Benth.
    -#> 38     Juniperus californica                          Carrière
    -#> 77         Prunus ilicifolia (Nutt. ex Hook. & Arn.) D. Dietr.
    -#> 78         Prunus subcordata                            Benth.
    -#>                                      uri
    -#> 23 http://www.tropicos.org/Name/27801458
    -#> 24 http://www.tropicos.org/Name/12300542
    -#> 36 http://www.tropicos.org/Name/27801486
    -#> 38  http://www.tropicos.org/Name/9400374
    -#> 77 http://www.tropicos.org/Name/27801102
    -#> 78 http://www.tropicos.org/Name/27801124
    -
    -

    So let's take the acceptedname column as a the new names and assign to a new vector

    -
    cleaned_names <- tnrs_out$acceptedname
    -
    -

    Then join names back, replacing them, or adding as a new column

    - -

    Replace

    -
    dat$species <- cleaned_names
    -head(dat)
    -#>                    species log_SLA leaf_water_content log_wood_density
    -#> 1     Ceanothus prostratus    3.46               0.51            -0.52
    -#> 2          Abies magnifica    3.58               0.49            -0.51
    -#> 3 Arctostaphylos canescens    3.87               0.62            -0.53
    -#> 4         Berberis nervosa      NA                 NA               NA
    -#> 5        Arbutus menziesii    5.07               0.69            -0.54
    -#> 6     Calocedrus decurrens    3.56               0.46            -0.31
    -#>   log_ht log_N
    -#> 1   7.72  0.02
    -#> 2   7.51 -0.31
    -#> 3   7.58 -0.14
    -#> 4   5.70    NA
    -#> 5   3.25  1.02
    -#> 6   5.33  0.29
    -
    -

    New column

    -
    dat$species_cleaned <- cleaned_names
    -head(dat)
    -#>                    species log_SLA leaf_water_content log_wood_density
    -#> 1     Ceanothus prostratus    3.46               0.51            -0.52
    -#> 2          Abies magnifica    3.58               0.49            -0.51
    -#> 3 Arctostaphylos canescens    3.87               0.62            -0.53
    -#> 4         Berberis nervosa      NA                 NA               NA
    -#> 5        Arbutus menziesii    5.07               0.69            -0.54
    -#> 6     Calocedrus decurrens    3.56               0.46            -0.31
    -#>   log_ht log_N          species_cleaned
    -#> 1   7.72  0.02     Ceanothus prostratus
    -#> 2   7.51 -0.31          Abies magnifica
    -#> 3   7.58 -0.14 Arctostaphylos canescens
    -#> 4   5.70    NA         Berberis nervosa
    -#> 5   3.25  1.02        Arbutus menziesii
    -#> 6   5.33  0.29     Calocedrus decurrens
    -
    -

    2. In-place

    - -

    You can use functions from the dplyr package to split-apply-combine, where split is split apart your vector for each taxon, apply to apply a function or functions to do name cleaning, then combine to put them back together.

    - -

    Here, we'll attach taxonomic ids from the Catalogue of Life to each species (each row) (with just a subset of the data to save time):

    -
    library("dplyr")
    -tbl_df(dat)[1:5,] %>%
    -  rowwise() %>%
    -  mutate(colid = get_colid(species)) %>%
    -  select(species, colid)
    -#> Source: local data frame [5 x 2]
    -#> Groups: <by row>
    -#> 
    -#>                    species    colid
    -#> 1     Ceanothus prostratus 19544732
    -#> 2          Abies magnifica 18158318
    -#> 3 Arctostaphylos canescens 19358934
    -#> 4         Berberis nervosa 19374077
    -#> 5        Arbutus menziesii 19358819
    -
    -

    Let's do something a bit more complicated. Get common names for each taxon in a new column, if more than 1, concatenate into a single character string for easy inclusion in a data.frame

    -
    sci2comm_concat <- function(x){
    -  temp <- sci2comm(x, db = "eol")
    -  if(length(temp) == 0) NA else paste0(temp[[1]], collapse = ", ")
    -}
    -
    -dat_new <- tbl_df(dat)[1:5,] %>%
    -  rowwise() %>%
    -  mutate(comm = sci2comm_concat(species))
    -
    -

    To see the new column, do

    -
    dat_new %>% select(comm)
    -#> Source: local data frame [5 x 1]
    -#> Groups: <by row>
    -#> 
    -#>                                                                          comm
    -#> 1                      Mahala-mat Ceanothus, prostrate ceanothus, squawcarpet
    -#> 2 Prächtige Tanne, Goldtanne (Gold-Tanne), Kalifornische Rot-Tanne, Pracht-Ta
    -#> 3                          hoary manzanita, hoary manzanita, Sonoma manzanita
    -#> 4 Longleaf Oregon-grape, Cascade barberry, Dull Oregon grape, Oregon grape-ho
    -#> 5                   pacific madrone, Madrona, madrone, Kalifornianmansikkapuu
    -
    + Recently noticed a little Python library called [httpcode](https://github.com/rspivak/httpcode) that does a simple thing: gives information on http codes in the CLI. I thought this could maybe potentially be useful for R. So I made an R version. + +## Installation + + +```r +devtools::install_github("sckott/httpcode") +``` + + +```r +library("httpcode") +``` + +## Search by http code + + +```r +http_code(100) +#> +#> Message: Continue +#> Explanation: Request received, please continue +``` + + +```r +http_code(400) +#> +#> Message: Bad Request +#> Explanation: Bad request syntax or unsupported method +``` + + +```r +http_code(503) +#> +#> Message: Service Unavailable +#> Explanation: The server cannot process the request due to a high load +``` + + +```r +http_code(999) +#> Error: No description found for code: 999 +``` + +## Fuzzy code search + + +```r +http_code('1xx') +#> [[1]] +#> +#> Message: Continue +#> Explanation: Request received, please continue +#> +#> [[2]] +#> +#> Message: Switching Protocols +#> Explanation: Switching to new protocol; obey Upgrade header +#> +#> [[3]] +#> +#> Message: Processing +#> Explanation: WebDAV; RFC 2518 +``` + + +```r +http_code('3xx') +#> [[1]] +#> +#> Message: Multiple Choices +#> Explanation: Object has several resources -- see URI list +#> +#> [[2]] +#> +#> Message: Moved Permanently +#> Explanation: Object moved permanently -- see URI list +#> +#> [[3]] +#> +#> Message: Found +#> Explanation: Object moved temporarily -- see URI list +#> +#> [[4]] +#> +#> Message: See Other +#> Explanation: Object moved -- see Method and URL list +#> +#> [[5]] +#> +#> Message: Not Modified +#> Explanation: Document has not changed since given time +#> +#> [[6]] +#> +#> Message: Use Proxy +#> Explanation: You must use proxy specified in Location to access this resource. +#> +#> [[7]] +#> +#> Message: Switch Proxy +#> Explanation: Subsequent requests should use the specified proxy +#> +#> [[8]] +#> +#> Message: Temporary Redirect +#> Explanation: Object moved temporarily -- see URI list +#> +#> [[9]] +#> +#> Message: Permanent Redirect +#> Explanation: Object moved permanently +``` + + +```r +http_code('30[12]') +#> [[1]] +#> +#> Message: Moved Permanently +#> Explanation: Object moved permanently -- see URI list +#> +#> [[2]] +#> +#> Message: Found +#> Explanation: Object moved temporarily -- see URI list +``` + + +```r +http_code('30[34]') +#> [[1]] +#> +#> Message: See Other +#> Explanation: Object moved -- see Method and URL list +#> +#> [[2]] +#> +#> Message: Not Modified +#> Explanation: Document has not changed since given time +``` + +## Search by text message + + +```r +http_search("request") +#> [[1]] +#> +#> Message: Continue +#> Explanation: Request received, please continue +#> +#> [[2]] +#> +#> Message: OK +#> Explanation: Request fulfilled, document follows +#> +#> [[3]] +#> +#> Message: Accepted +#> Explanation: Request accepted, processing continues off-line +#> +#> [[4]] +#> +#> Message: Non-Authoritative Information +#> Explanation: Request fulfilled from cache +#> +#> [[5]] +#> +#> Message: No Content +#> Explanation: Request fulfilled, nothing follows +#> +#> [[6]] +#> +#> Message: Switch Proxy +#> Explanation: Subsequent requests should use the specified proxy +#> +#> [[7]] +#> +#> Message: Bad Request +#> Explanation: Bad request syntax or unsupported method +#> +#> [[8]] +#> +#> Message: Forbidden +#> Explanation: Request forbidden -- authorization will not help +#> +#> [[9]] +#> +#> Message: Request Timeout +#> Explanation: Request timed out; try again later. +#> +#> [[10]] +#> +#> Message: Conflict +#> Explanation: Request conflict. +#> +#> [[11]] +#> +#> Message: Request Entity Too Large +#> Explanation: Entity is too large. +#> +#> [[12]] +#> +#> Message: Request-URI Too Long +#> Explanation: URI is too long. +#> +#> [[13]] +#> +#> Message: Requested Range Not Satisfiable +#> Explanation: Cannot satisfy request range. +#> +#> [[14]] +#> +#> Message: Service Unavailable +#> Explanation: The server cannot process the request due to a high load +#> +#> [[15]] +#> +#> Message: HTTP Version Not Supported +#> Explanation: Cannot fulfill request. +``` + + +```r +http_search("forbidden") +#> [[1]] +#> +#> Message: Forbidden +#> Explanation: Request forbidden -- authorization will not help +``` + + +```r +http_search("too") +#> [[1]] +#> +#> Message: Request Entity Too Large +#> Explanation: Entity is too large. +#> +#> [[2]] +#> +#> Message: Request-URI Too Long +#> Explanation: URI is too long. +``` + + +```r +http_search("birds") +#> Error: No status code found for search: : birds +``` +

    - - 1000 commits to taxize + + taxize workflows

    - + + + A missed chat on the rOpenSci website the other day asked: + +> Hi there, i am trying to use the taxize package and have a .csv file of species names to run through taxize updating them. What would be the code i would need to run to achieve this? + +One way to answer this is to talk about the basic approach to importing data, doing stuff to the data, then recombining data. There are many ways to do this, but I'll go over a few of them. + +## Install taxize + + +```r +install.packages("taxize") +install.packages("downloader") +``` + + +```r +library("taxize") +``` + +## Import data + +We'll use Winston Chang's new `downloader` package to avoid problems with `https`, and get a dataset from our ropensci datasets repo [https://github.com/ropensci/datasets](https://github.com/ropensci/datasets) + + +```r +downloader::download("https://raw.githubusercontent.com/ropensci/datasets/master/planttraits/morphological.csv", "morphological.csv") +dat <- read.csv("morphological.csv", stringsAsFactors = FALSE) +head(dat) +#> species log_SLA leaf_water_content log_wood_density +#> 1 Abies concolor 3.46 0.51 -0.52 +#> 2 Abies grandis 3.58 0.49 -0.51 +#> 3 Abies magnifica 3.87 0.62 -0.53 +#> 4 Acacia farnesiana NA NA NA +#> 5 Acer glabrum 5.07 0.69 -0.54 +#> 6 Adenostoma fasciculata 3.56 0.46 -0.31 +#> log_ht log_N +#> 1 7.72 0.02 +#> 2 7.51 -0.31 +#> 3 7.58 -0.14 +#> 4 5.70 NA +#> 5 3.25 1.02 +#> 6 5.33 0.29 +``` + +After importing data, there are a variety of approaches you could take: + +1. Vector: Take species names as vector from your `data.frame`, cleaning them, then re-attching to the `data.frame` later, or +2. In-Place: Use for loops or `lapply` family functions to iterate over each name while simultaneously re-inserting into the `data.frame` + +## 1. Vector + +Make a vector of names + + +```r +splist <- dat$species +``` + +Then proceed to do name cleaning, e.g, we can use the `tnrs` function to see if any names are potentially not spelled correctly. + + +```r +tnrs_out <- tnrs(splist, source = "iPlant_TNRS") +head(tnrs_out) +#> submittedname acceptedname sourceid score +#> 1 Ceanothus prostratus Ceanothus prostratus iPlant_TNRS 1 +#> 2 Abies magnifica Abies magnifica iPlant_TNRS 1 +#> 3 Arctostaphylos canescens Arctostaphylos canescens iPlant_TNRS 1 +#> 4 Berberis nervosa Berberis nervosa iPlant_TNRS 1 +#> 5 Arbutus menziesii Arbutus menziesii iPlant_TNRS 1 +#> 6 Calocedrus decurrens Calocedrus decurrens iPlant_TNRS 1 +#> matchedname authority +#> 1 Ceanothus prostratus Benth. +#> 2 Abies magnifica A. Murray bis +#> 3 Arctostaphylos canescens Eastw. +#> 4 Berberis nervosa Pursh +#> 5 Arbutus menziesii Pursh +#> 6 Calocedrus decurrens (Torr.) Florin +#> uri +#> 1 http://www.tropicos.org/Name/27500276 +#> 2 http://www.tropicos.org/Name/24900142 +#> 3 http://www.tropicos.org/Name/12302547 +#> 4 http://www.tropicos.org/Name/3500175 +#> 5 http://www.tropicos.org/Name/12302436 +#> 6 http://www.tropicos.org/Name/9400069 +``` + +Those with score of less than 1 may have misspellings + + +```r +tnrs_out[ tnrs_out$score < 1, ] +#> submittedname acceptedname sourceid score +#> 23 Adenostoma fasciculata Adenostoma fasciculatum iPlant_TNRS 0.97 +#> 24 Arctostaphylos glandulosus Arctostaphylos glandulosa iPlant_TNRS 0.97 +#> 36 Chamaebatia foliosa Chamaebatia foliolosa iPlant_TNRS 0.95 +#> 38 Juniperus californicus Juniperus californica iPlant_TNRS 0.97 +#> 77 Prunus illicifolia Prunus ilicifolia iPlant_TNRS 0.99 +#> 78 Prunus subcordatus Prunus subcordata iPlant_TNRS 0.97 +#> matchedname authority +#> 23 Adenostoma fasciculatum Hook. & Arn. +#> 24 Arctostaphylos glandulosa Eastw. +#> 36 Chamaebatia foliolosa Benth. +#> 38 Juniperus californica Carrière +#> 77 Prunus ilicifolia (Nutt. ex Hook. & Arn.) D. Dietr. +#> 78 Prunus subcordata Benth. +#> uri +#> 23 http://www.tropicos.org/Name/27801458 +#> 24 http://www.tropicos.org/Name/12300542 +#> 36 http://www.tropicos.org/Name/27801486 +#> 38 http://www.tropicos.org/Name/9400374 +#> 77 http://www.tropicos.org/Name/27801102 +#> 78 http://www.tropicos.org/Name/27801124 +``` + +So let's take the `acceptedname` column as a the new names and assign to a new vector + + +```r +cleaned_names <- tnrs_out$acceptedname +``` + +Then join names back, replacing them, or adding as a new column + +Replace + + +```r +dat$species <- cleaned_names +head(dat) +#> species log_SLA leaf_water_content log_wood_density +#> 1 Ceanothus prostratus 3.46 0.51 -0.52 +#> 2 Abies magnifica 3.58 0.49 -0.51 +#> 3 Arctostaphylos canescens 3.87 0.62 -0.53 +#> 4 Berberis nervosa NA NA NA +#> 5 Arbutus menziesii 5.07 0.69 -0.54 +#> 6 Calocedrus decurrens 3.56 0.46 -0.31 +#> log_ht log_N +#> 1 7.72 0.02 +#> 2 7.51 -0.31 +#> 3 7.58 -0.14 +#> 4 5.70 NA +#> 5 3.25 1.02 +#> 6 5.33 0.29 +``` + +New column + + +```r +dat$species_cleaned <- cleaned_names +head(dat) +#> species log_SLA leaf_water_content log_wood_density +#> 1 Ceanothus prostratus 3.46 0.51 -0.52 +#> 2 Abies magnifica 3.58 0.49 -0.51 +#> 3 Arctostaphylos canescens 3.87 0.62 -0.53 +#> 4 Berberis nervosa NA NA NA +#> 5 Arbutus menziesii 5.07 0.69 -0.54 +#> 6 Calocedrus decurrens 3.56 0.46 -0.31 +#> log_ht log_N species_cleaned +#> 1 7.72 0.02 Ceanothus prostratus +#> 2 7.51 -0.31 Abies magnifica +#> 3 7.58 -0.14 Arctostaphylos canescens +#> 4 5.70 NA Berberis nervosa +#> 5 3.25 1.02 Arbutus menziesii +#> 6 5.33 0.29 Calocedrus decurrens +``` + +## 2. In-place + +You can use functions from the `dplyr` package to `split-apply-combine`, where `split` is split apart your vector for each taxon, `apply` to apply a function or functions to do name cleaning, then `combine` to put them back together. + +Here, we'll attach taxonomic ids from the Catalogue of Life to each species (each row) (with just a subset of the data to save time): + + +```r +library("dplyr") +tbl_df(dat)[1:5,] %>% + rowwise() %>% + mutate(colid = get_colid(species)) %>% + select(species, colid) +#> Source: local data frame [5 x 2] +#> Groups: +#> +#> species colid +#> 1 Ceanothus prostratus 19544732 +#> 2 Abies magnifica 18158318 +#> 3 Arctostaphylos canescens 19358934 +#> 4 Berberis nervosa 19374077 +#> 5 Arbutus menziesii 19358819 +``` + +Let's do something a bit more complicated. Get common names for each taxon in a new column, if more than 1, concatenate into a single character string for easy inclusion in a `data.frame` + + +```r +sci2comm_concat <- function(x){ + temp <- sci2comm(x, db = "eol") + if(length(temp) == 0) NA else paste0(temp[[1]], collapse = ", ") +} + +dat_new <- tbl_df(dat)[1:5,] %>% + rowwise() %>% + mutate(comm = sci2comm_concat(species)) +``` + +To see the new column, do + + +```r +dat_new %>% select(comm) +#> Source: local data frame [5 x 1] +#> Groups: +#> +#> comm +#> 1 Mahala-mat Ceanothus, prostrate ceanothus, squawcarpet +#> 2 Prächtige Tanne, Goldtanne (Gold-Tanne), Kalifornische Rot-Tanne, Pracht-Ta +#> 3 hoary manzanita, hoary manzanita, Sonoma manzanita +#> 4 Longleaf Oregon-grape, Cascade barberry, Dull Oregon grape, Oregon grape-ho +#> 5 pacific madrone, Madrona, madrone, Kalifornianmansikkapuu +``` -

    Just today we've hit 1000 commits on taxize! taxize is an R client to search across lots of taxonomic databases on the web. In honor of the 1000 commit milestone, here's some stats on the project.

    - -

    Before that, lots of people have contributed to taxize, it's a big group effort:

    - - - -

    In addition, we've had lots of feedback from users, including feature requests and bug reports, making taxize a lot better.

    - -

    Setup

    -
    library("devtools")
    -library("httr")
    -library("ggplot2")
    -library("stringr")
    -library("plyr")
    -library("dplyr")
    -
    -

    Define functions

    -
    github_auth <- function(appname = getOption("gh_appname"), key = getOption("gh_id"),
    -                        secret = getOption("gh_secret")) {
    -  if (is.null(getOption("gh_token"))) {
    -    myapp <- oauth_app(appname, key, secret)
    -    token <- oauth2.0_token(oauth_endpoints("github"), myapp)
    -    options(gh_token = token)
    -  } else {
    -    token <- getOption("gh_token")
    -  }
    -  return(token)
    -}
    -
    -make_url <- function(x, y, z) {
    -  sprintf("https://api.github.com/repos/%s/%s/%s", x, y, z)
    -}
    -
    -process_result <- function(x) {
    -  stop_for_status(x)
    -  if (!x$headers$`content-type` == "application/json; charset=utf-8")
    -    stop("content type mismatch")
    -  tmp <- content(x, as = "text")
    -  jsonlite::fromJSON(tmp, flatten = TRUE)
    -}
    -
    -gh_commits <- function(repo, owner = "ropensci", ...) {
    -  token <- github_auth()
    -  outout <- list(); iter <- 0; nexturl <- "dontstop"
    -  while(nexturl != "stop"){
    -    iter <- iter + 1
    -    req <- if(grepl("https:/", nexturl)) GET(nexturl, config = c(token = token)) else GET(make_url(owner, repo, "commits"), query = list(per_page=100), config = c(token = token))
    -    outout[[iter]] <- process_result(req)
    -    link <- req$headers$link
    -    nexturl <- if(is.null(link)){ "stop" } else {
    -      if(grepl("next", link)){
    -        stringr::str_extract(link, "https://[0-9A-Za-z/?=\\._&]+")
    -      } else {
    -        "stop"
    -      }
    -    }
    -  }
    -  outout <- outout[sapply(outout, function(x) !identical(x, list()))]
    -  dplyr::rbind_all(outout)
    -}
    -
    -gh_issues <- function(repo, owner = "ropensci", ...) {
    -  token <- github_auth()
    -  outout <- list(); iter <- 0; nexturl <- "dontstop"
    -  while(nexturl != "stop"){
    -    iter <- iter + 1
    -    req <- if(grepl("https:/", nexturl)) GET(nexturl, query=list(state="all"), config = c(token = token)) else GET(make_url(owner, repo, "issues"), query = list(per_page=100, state="all"), config = c(token = token))
    -    outout[[iter]] <- process_result(req)
    -    link <- req$headers$link
    -    nexturl <- if(is.null(link)){ "stop" } else {
    -      if(grepl("next", link)){
    -        stringr::str_extract(link, "https://[0-9A-Za-z/?=\\._&]+")
    -      } else {
    -        "stop"
    -      }
    -    }
    -  }
    -  outout <- outout[sapply(outout, function(x) !identical(x, list()))]
    -  dplyr::rbind_all(outout)
    -}
    -
    -gh_commit <- function(sha, repo, owner = "ropensci", ...) {
    -  token <- github_auth()
    -  req <- GET(paste0(make_url(owner, repo, "commits"), "/", sha),
    -             config = c(token = token, ...))
    -  process_result(req)
    -}
    -
    -gh_verb <- function(owner = "ropensci", repo, verb, args=list(), ...) {
    -  token <- github_auth()
    -  req <- GET(make_url(owner, repo, verb), query=args, config = c(token = token, ...))
    -  process_result(req)
    -}
    -
    -

    Commits

    - -

    List of commits

    -
    out <- gh_commits("taxize")
    -
    -

    Get changes for each commit

    -
    changes <- vapply(out$sha, function(x) gh_commit(x, repo="taxize")$stats$total, numeric(1))
    -changesdf <-  data.frame(changes=unname(changes), sha=names(changes))
    -
    -

    Combine

    -
    out <- inner_join(out, changesdf)
    -
    -

    Total changes through time (additions + deletions)

    -
    ct <- function(x) as.POSIXct(x, format="%Y-%m-%dT%H:%M:%SZ", tz="UTC")
    -out %>%
    -  mutate(commit.committer.date = ct(commit.committer.date)) %>%
    -  ggplot(aes(x=commit.committer.date, y=changes)) +
    -    geom_area(fill="#87D2A0") +
    -    theme_grey(base_size = 20)
    -
    -

    - -

    By Authors

    -
    out %>%
    -  group_by(author.login) %>%
    -  summarise(n = n()) %>%
    -  ggplot(aes(author.login, n)) +
    -    geom_bar(stat = "identity", fill="#87D2A0") +
    -    coord_flip() +
    -    theme_grey(base_size = 20)
    -
    -

    - -

    Issues

    -
    out <- gh_issues("taxize")
    -
    -

    Number of issues

    -
    NROW(out)
    -#> [1] 382
    -
    -

    Number of open issues

    -
    out %>%
    -  filter(state == "open") %>%
    -  NROW
    -#> [1] 35
    -
    -

    Number of pull requests

    -
    out %>%
    -  filter(!is.na(pull_request.url)) %>%
    -  NROW
    -#> [1] 119
    -
    -

    Forks, number of

    -
    NROW(gh_verb(repo = "taxize", verb="forks"))
    -#> [1] 16
    -
    -

    Stars, number of

    -
    NROW(gh_verb(repo = "taxize", verb="stargazers", args=list(per_page=100)))
    -#> [1] 44
    -
    -

    Watchers, number of

    -
    NROW(gh_verb(repo = "taxize", verb="subscribers", args=list(per_page=100)))
    -#> [1] 12
    -

    - - Intro to alpha ckanr - R client for CKAN RESTful API + + 1000 commits to taxize

    - - -

    Recently I had need to create a client for scraping museum metadata to help out some folks that use that kind of data. It's called musemeta. One of the data sources in that package uses the open source data portal software CKAN, and so we can interact with the CKAN API to get data. Since many groups can use CKAN API/etc infrastucture because it's open source, I thought why not have a general purpose R client for this, since there are other clients for Python, PHP, Ruby, etc.

    - -

    Here's a bit of an intro:

    - -

    Setup

    - -

    Get/load packages

    -
    install.packages("devtools")
    -devtools::install_github("ropensci/ckanr")
    -
    library("ckanr")
    -
    -
    -

    Note: the default URL is for http://data.techno-science.ca/. You can change that in the url parameter

    -
    - -

    Some package details

    - -

    All API alls are POST requests, and handled through a helper function called ckan_POST(). See ckanr:::ckan_POST to see the function.

    - -

    There are some common parameters across most functions that are worth describing a bit:

    - -
      -
    • offset (numeric) Where to start getting items from (optional, default: 0)
    • -
    • limit (numeric) The maximum number of items to return (optional, default: 31)
    • -
    • url Base url to use. Default: http://data.techno-science.ca
    • -
    • as (character) One of list (default), table, or json. Parsing with table option uses jsonlite::fromJSON(..., simplifyDataFrame = TRUE), which attempts to parse data to data.frame's when possible, so the result can vary.
    • -
    • ... Curl args passed on to httr::POST
    • -
    - -

    Changes

    -
    changes(limit = 2, as = "table")
    -#>                                user_id                  timestamp
    -#> 1 b50449ea-1dcc-4d52-b620-fc95bf56034b 2014-11-06T18:58:08.001743
    -#> 2 b50449ea-1dcc-4d52-b620-fc95bf56034b 2014-11-06T18:55:55.059527
    -#>                              object_id
    -#> 1 cc6a523c-cecf-4a95-836b-295a11ce2bce
    -#> 2 cc6a523c-cecf-4a95-836b-295a11ce2bce
    -#>                            revision_id data.package.maintainer
    -#> 1 5d11079e-fc05-4121-9fd5-fe086f5e5f33                        
    -#> 2 4a591538-0584-487b-8ed1-3260d1d09d77                        
    -#>   data.package.name data.package.metadata_modified data.package.author
    -#> 1              test     2014-11-06T18:55:54.772675                    
    -#> 2              test     2014-11-06T18:55:54.772675                    
    -#>   data.package.url data.package.notes               data.package.owner_org
    -#> 1                                     fafa260d-e2bf-46cd-9c35-34c1dfa46c57
    -#> 2                                     fafa260d-e2bf-46cd-9c35-34c1dfa46c57
    -#>   data.package.private data.package.maintainer_email
    -#> 1                FALSE                              
    -#> 2                FALSE                              
    -#>   data.package.author_email data.package.state data.package.version
    -#> 1                                      deleted                     
    -#> 2                                       active                     
    -#>           data.package.creator_user_id
    -#> 1 b50449ea-1dcc-4d52-b620-fc95bf56034b
    -#> 2 b50449ea-1dcc-4d52-b620-fc95bf56034b
    -#>                        data.package.id data.package.title
    -#> 1 cc6a523c-cecf-4a95-836b-295a11ce2bce               test
    -#> 2 cc6a523c-cecf-4a95-836b-295a11ce2bce               test
    -#>               data.package.revision_id data.package.type
    -#> 1 5d11079e-fc05-4121-9fd5-fe086f5e5f33           dataset
    -#> 2 4a591538-0584-487b-8ed1-3260d1d09d77           dataset
    -#>   data.package.license_id                                   id
    -#> 1            notspecified 59c308c8-68b2-4b92-bc57-129378d31882
    -#> 2            notspecified a8577e2c-f742-49c2-bef3-ca3299e58704
    -#>     activity_type
    -#> 1 deleted package
    -#> 2 changed package
    -
    -

    List datasets

    -
    datasets(as = "table")
    -#>  [1] "artifact-data-agriculture"                                  
    -#>  [2] "artifact-data-aviation"                                     
    -#>  [3] "artifact-data-bookbinding"                                  
    -#>  [4] "artifact-data-chemistry"                                    
    -#>  [5] "artifact-data-communications"                               
    -#>  [6] "artifact-data-computing-technology"                         
    -#>  [7] "artifact-data-domestic-technology"                          
    -#>  [8] "artifact-data-energy-electric"                              
    -#>  [9] "artifact-data-exploration-and-survey"                       
    -#> [10] "artifact-data-fisheries"                                    
    -#> [11] "artifact-data-forestry"                                     
    -#> [12] "artifact-data-horology"                                     
    -#> [13] "artifact-data-industrial-technology"                        
    -#> [14] "artifact-data-lighting-technology"                          
    -#> [15] "artifact-data-location-canada-agriculture-and-food-museum"  
    -#> [16] "artifact-data-location-canada-aviation-and-space-museum"    
    -#> [17] "artifact-data-location-canada-science-and-technology-museum"
    -#> [18] "artifact-data-marine-transportation"                        
    -#> [19] "artifact-data-mathematics"                                  
    -#> [20] "artifact-data-medical-technology"                           
    -#> [21] "artifact-data-meteorology"                                  
    -#> [22] "artifact-data-metrology"                                    
    -#> [23] "artifact-data-mining-and-metallurgy"                        
    -#> [24] "artifact-data-motorized-ground-transportation"              
    -#> [25] "artifact-data-non-motorized-ground-transportation"          
    -#> [26] "artifact-data-on-loan"                                      
    -#> [27] "artifact-data-photography"                                  
    -#> [28] "artifact-data-physics"                                      
    -#> [29] "artifact-data-printing"                                     
    -#> [30] "artifact-data-railway-transportation"                       
    -#> [31] "artifact-dataset-fire-fighting"
    -
    -

    List tags

    -
    tag_list('aviation', as='table')
    -#>   vocabulary_id                     display_name
    -#> 1            NA                         Aviation
    -#> 2            NA Canada Aviation and Space Museum
    -#>                                     id                             name
    -#> 1 cc1db2db-b08b-4888-897f-a17eade2461b                         Aviation
    -#> 2 8d05a650-bc7b-4b89-bcc8-c10177e60119 Canada Aviation and Space Museum
    -
    -

    Show tags

    - -

    Subset for readme brevity

    -
    tag_show('Aviation')$packages[[1]][1:3]  
    -#> $owner_org
    -#> [1] "fafa260d-e2bf-46cd-9c35-34c1dfa46c57"
    -#> 
    -#> $maintainer
    -#> [1] ""
    -#> 
    -#> $relationships_as_object
    -#> list()
    -
    -

    List groups

    -
    group_list(as='table')
    -#>                         display_name description
    -#> 1                     Communications            
    -#> 2 Domestic and Industrial Technology            
    -#> 3                         Everything            
    -#> 4                           Location            
    -#> 5                          Resources            
    -#> 6         Scientific Instrumentation            
    -#> 7                     Transportation            
    -#>                                title
    -#> 1                     Communications
    -#> 2 Domestic and Industrial Technology
    -#> 3                         Everything
    -#> 4                           Location
    -#> 5                          Resources
    -#> 6         Scientific Instrumentation
    -#> 7                     Transportation
    -#>                                                                   image_display_url
    -#> 1       http://data.techno-science.ca/uploads/group/20141024-162305.6896412comm.jpg
    -#> 2    http://data.techno-science.ca/uploads/group/20141024-162324.3636615domtech.jpg
    -#> 3 http://data.techno-science.ca/uploads/group/20141024-162448.0656596everything.jpg
    -#> 4   http://data.techno-science.ca/uploads/group/20141024-162528.8786547location.jpg
    -#> 5     http://data.techno-science.ca/uploads/group/20141024-162608.3732604resour.jpg
    -#> 6    http://data.techno-science.ca/uploads/group/20141024-162549.1925831sciinst.jpg
    -#> 7  http://data.techno-science.ca/uploads/group/20141024-162624.1872823transport.jpg
    -#>   approval_status is_organization  state
    -#> 1        approved           FALSE active
    -#> 2        approved           FALSE active
    -#> 3        approved           FALSE active
    -#> 4        approved           FALSE active
    -#> 5        approved           FALSE active
    -#> 6        approved           FALSE active
    -#> 7        approved           FALSE active
    -#>                               image_url
    -#> 1       20141024-162305.6896412comm.jpg
    -#> 2    20141024-162324.3636615domtech.jpg
    -#> 3 20141024-162448.0656596everything.jpg
    -#> 4   20141024-162528.8786547location.jpg
    -#> 5     20141024-162608.3732604resour.jpg
    -#> 6    20141024-162549.1925831sciinst.jpg
    -#> 7  20141024-162624.1872823transport.jpg
    -#>                            revision_id packages  type
    -#> 1 cc302424-2e68-4fcc-9a3a-6de60748c2e4        5 group
    -#> 2 b7d95b87-5999-45f9-8775-c64094842551        2 group
    -#> 3 c2f0c59a-a543-4d67-a61f-4f387068ba53        1 group
    -#> 4 6816d571-d2bd-4131-b99d-80e7e6797492        4 group
    -#> 5 e37ee30d-577b-4349-8f0e-eaa4543497e8        6 group
    -#> 6 74eba42e-08b3-4400-b40f-3d6159ae6e9d       10 group
    -#> 7 a6cc4aab-eae9-42ba-9ab4-cbf45d5c6a0e        7 group
    -#>                                     id                               name
    -#> 1 5268ce18-e3b8-4802-b29e-30740b46e52d                     communications
    -#> 2 5a9a8095-9e0c-485e-84f6-77f577607991 domestic-and-industrial-technology
    -#> 3 d7dd233e-a1cc-43da-8152-f7ed15d26756                         everything
    -#> 4 770fc9c0-d4f3-48b0-a4ee-e00c6882df1d                           location
    -#> 5 f6c205de-cc95-4308-ac9f-5a63f1a5c7ee                          resources
    -#> 6 b98ff457-2031-48b6-b681-9adb3afc501b         scientific-instrumentation
    -#> 7 a73bf7be-310d-472e-83e1-43a3d87602ba                     transportation
    -
    -

    Show groups

    - -

    Subset for readme brevity

    -
    group_show('communications', as='table')$users
    -#>   openid about capacity     name                    created
    -#> 1     NA  <NA>    admin     marc 2014-10-24T14:44:29.885262
    -#> 2     NA          admin sepandar 2014-10-23T19:40:42.056418
    -#>                         email_hash sysadmin
    -#> 1 a32002c960476614370a16e9fb81f436    FALSE
    -#> 2 10b930a228afd1da2647d62e70b71bf8     TRUE
    -#>   activity_streams_email_notifications  state number_of_edits
    -#> 1                                FALSE active             376
    -#> 2                                FALSE active              44
    -#>   number_administered_packages display_name fullname
    -#> 1                           39         marc     <NA>
    -#> 2                            1     sepandar         
    -#>                                     id
    -#> 1 27778230-2e90-4818-9f00-bbf778c8fa09
    -#> 2 b50449ea-1dcc-4d52-b620-fc95bf56034b
    -
    -

    Show a package

    -
    package_show('34d60b13-1fd5-430e-b0ec-c8bc7f4841cf', as='table')$resources
    -#>                      resource_group_id cache_last_updated
    -#> 1 ea8533d9-cdc6-4e0e-97b9-894e06d50b92                 NA
    -#> 2 ea8533d9-cdc6-4e0e-97b9-894e06d50b92                 NA
    -#> 3 ea8533d9-cdc6-4e0e-97b9-894e06d50b92                 NA
    -#> 4 ea8533d9-cdc6-4e0e-97b9-894e06d50b92                 NA
    -#>           revision_timestamp webstore_last_updated
    -#> 1 2014-10-28T18:13:22.213530                    NA
    -#> 2 2014-11-04T02:59:50.567068                    NA
    -#> 3 2014-11-05T21:23:58.533397                    NA
    -#> 4 2014-11-05T21:25:16.848423                    NA
    -#>                                     id size  state hash
    -#> 1 be2b0af8-24a8-4a55-8b30-89f5459b713a   NA active     
    -#> 2 7d65910e-4bdc-4f06-a213-e24e36762767   NA active     
    -#> 3 97622ad7-1507-4f6a-8acb-14e826447389   NA active     
    -#> 4 7a72498a-c49c-4e84-8b10-58991de10df6   NA active     
    -#>                                    description format
    -#> 1                                  XML Dataset    XML
    -#> 2 Data dictionary for CSTMC artifact datasets.    XLS
    -#> 3       Tips for using the artifacts datasets.   .php
    -#> 4       Tips for using the artifacts datasets.   .php
    -#>   tracking_summary.total tracking_summary.recent mimetype_inner url_type
    -#> 1                      0                       0             NA       NA
    -#> 2                      0                       0             NA       NA
    -#> 3                      0                       0             NA       NA
    -#> 4                      0                       0             NA       NA
    -#>   mimetype cache_url                               name
    -#> 1       NA        NA Artifact Data - Vacuum Tubes (XML)
    -#> 2       NA        NA                    Data Dictionary
    -#> 3       NA        NA                     Tips (English)
    -#> 4       NA        NA                      Tips (French)
    -#>                      created
    -#> 1 2014-10-28T18:13:22.240393
    -#> 2 2014-11-04T02:59:50.643658
    -#> 3 2014-11-04T18:14:23.952937
    -#> 4 2014-11-05T21:25:16.887796
    -#>                                                                                                                                                    url
    -#> 1                         http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/groups-groupes/vacuum-tubes-tubes-electronique.xml
    -#> 2 http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/cstmc-artifact-data-dictionary-dictionnaire-de-donnees-artefacts-smstc.xls
    -#> 3                                                                          http://techno-science.ca/en/open-data/tips-using-artifact-open-data-set.php
    -#> 4                                                                 http://techno-science.ca/fr/donnees-ouvertes/conseils-donnees-ouvertes-artefacts.php
    -#>   webstore_url last_modified position                          revision_id
    -#> 1           NA            NA        0 9a27d884-f181-4842-ab47-cda35a8bf99a
    -#> 2           NA            NA        1 5d27b3e6-7870-4c12-a122-9e9f5adee4a0
    -#> 3           NA            NA        2 40993f16-402b-439c-9288-2f2b177e4b8f
    -#> 4           NA            NA        3 57f1488e-a140-4eb6-9329-fc13202a73af
    -#>   resource_type
    -#> 1            NA
    -#> 2            NA
    -#> 3            NA
    -#> 4            NA
    -
    -

    Search for packages

    -
    out <- package_search(q = '*:*', rows = 2, as="table")$results
    -out[, !names(out) %in% 'resources']
    -#>                      license_title maintainer relationships_as_object
    -#> 1 Open Government Licence - Canada                               NULL
    -#> 2 Open Government Licence - Canada                               NULL
    -#>   private maintainer_email         revision_timestamp
    -#> 1   FALSE                  2014-11-05T23:17:46.220002
    -#> 2   FALSE                  2014-11-05T23:17:04.923594
    -#>                                     id           metadata_created
    -#> 1 35d5484d-38ce-495e-8722-7857c4fd17bf 2014-10-28T20:13:11.572558
    -#> 2 da65507d-b018-4d3b-bde3-5419cf29d144 2014-10-28T14:59:21.386177
    -#>            metadata_modified author author_email  state version
    -#> 1 2014-11-05T23:17:46.220657                     active        
    -#> 2 2014-11-05T23:17:04.924229                     active        
    -#>                        creator_user_id    type num_resources
    -#> 1 27778230-2e90-4818-9f00-bbf778c8fa09 dataset             4
    -#> 2 27778230-2e90-4818-9f00-bbf778c8fa09 dataset             4
    -#>                                                                                                                       tags
    -#> 1                         NA, Location, Location, 2014-10-28T20:13:11.572558, active, da88c5a2-3766-41ea-a75b-9c87047cc528
    -#> 2 NA, Computing Technology, Computing Technology, 2014-10-28T14:59:21.386177, active, 5371dc28-9ce8-4f21-9afb-1f155f132bfe
    -#>   tracking_summary.total tracking_summary.recent
    -#> 1                     35                      10
    -#> 2                     24                       8
    -#>                                                                                                                                                                                                       groups
    -#> 1                                                      Location, , http://data.techno-science.ca/uploads/group/20141024-162528.8786547location.jpg, Location, 770fc9c0-d4f3-48b0-a4ee-e00c6882df1d, location
    -#> 2 Scientific Instrumentation, , http://data.techno-science.ca/uploads/group/20141024-162549.1925831sciinst.jpg, Scientific Instrumentation, b98ff457-2031-48b6-b681-9adb3afc501b, scientific-instrumentation
    -#>   license_id relationships_as_subject num_tags organization.description
    -#> 1 ca-ogl-lgo                     NULL        1                         
    -#> 2 ca-ogl-lgo                     NULL        1                         
    -#>         organization.created organization.title organization.name
    -#> 1 2014-10-24T14:49:36.878579              CSTMC             cstmc
    -#> 2 2014-10-24T14:49:36.878579              CSTMC             cstmc
    -#>   organization.revision_timestamp organization.is_organization
    -#> 1      2014-10-24T14:49:36.813670                         TRUE
    -#> 2      2014-10-24T14:49:36.813670                         TRUE
    -#>   organization.state organization.image_url
    -#> 1             active                       
    -#> 2             active                       
    -#>               organization.revision_id organization.type
    -#> 1 7a325a56-46f1-419c-b7b2-ec7501edb35a      organization
    -#> 2 7a325a56-46f1-419c-b7b2-ec7501edb35a      organization
    -#>                        organization.id organization.approval_status
    -#> 1 fafa260d-e2bf-46cd-9c35-34c1dfa46c57                     approved
    -#> 2 fafa260d-e2bf-46cd-9c35-34c1dfa46c57                     approved
    -#>                                                          name isopen url
    -#> 1 artifact-data-location-canada-science-and-technology-museum  FALSE    
    -#> 2                          artifact-data-computing-technology  FALSE    
    -#>                                                                                                                                                                        notes
    -#> 1 This dataset includes artifacts in the collection of the Canada Science and Technology Museums Corporation that are currently in the Canada Science and Technology Museum.
    -#> 2                                This dataset includes artifacts in the collection of the Canada Science and Technology Museums Corporation related to computing technology.
    -#>                              owner_org extras
    -#> 1 fafa260d-e2bf-46cd-9c35-34c1dfa46c57   NULL
    -#> 2 fafa260d-e2bf-46cd-9c35-34c1dfa46c57   NULL
    -#>                                            license_url
    -#> 1 http://data.gc.ca/eng/open-government-licence-canada
    -#> 2 http://data.gc.ca/eng/open-government-licence-canada
    -#>                                                             title
    -#> 1 Artifact Data - Location - Canada Science and Technology Museum
    -#> 2                            Artifact Data - Computing Technology
    -#>                            revision_id
    -#> 1 694a977a-c238-47a4-8671-caddca4edfca
    -#> 2 858cb240-76a0-406a-800c-e4ae6cc56ab9
    -
    -

    Search for resources

    -
    resource_search(q = 'name:data', limit = 2, as='table')
    -#> $count
    -#> [1] 71
    -#> 
    -#> $results
    -#>                      resource_group_id cache_last_updated
    -#> 1 01a82e52-01bf-4a9c-9b45-c4f9b92529fa                 NA
    -#> 2 01a82e52-01bf-4a9c-9b45-c4f9b92529fa                 NA
    -#>   webstore_last_updated                                   id size  state
    -#> 1                    NA e179e910-27fb-44f4-a627-99822af49ffa   NA active
    -#> 2                    NA ba84e8b7-b388-4d2a-873a-7b107eb7f135   NA active
    -#>   last_modified hash                                  description format
    -#> 1            NA                                       XML Dataset    XML
    -#> 2            NA      Data dictionary for CSTMC artifact datasets.    XLS
    -#>   mimetype_inner url_type mimetype cache_url
    -#> 1             NA       NA       NA        NA
    -#> 2             NA       NA       NA        NA
    -#>                                           name                    created
    -#> 1 Artifact Data - Exploration and Survey (XML) 2014-10-28T15:50:35.374303
    -#> 2                              Data Dictionary 2014-11-03T18:01:02.094210
    -#>                                                                                                                                                    url
    -#> 1              http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/groups-groupes/exploration-and-survey-exploration-et-leve.xml
    -#> 2 http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/cstmc-artifact-data-dictionary-dictionnaire-de-donnees-artefacts-smstc.xls
    -#>   webstore_url position                          revision_id resource_type
    -#> 1           NA        0 a22e6741-3e89-4db0-a802-ba594b1c1fad            NA
    -#> 2           NA        1 da1f8585-521d-47ef-8ead-7832474a3421            NA
    -
    -

    Future work

    - -
      -
    • There's already an issue to add support for DataStore
    • -
    • This client needs to be tested against many other CKAN API instances to make sure it's robust
    • -
    • Add a test suite
    • -
    • Use cases: would be nice to include in the package documentation use cases
    • -
    • Other things? Get in touch on twitter @recology_ or below
    • -
    + + + Just today we've hit 1000 commits on `taxize`! `taxize` is an R client to search across lots of taxonomic databases on the web. In honor of the 1000 commit milestone, here's some stats on the project. + +Before that, lots of people have contributed to `taxize`, it's a big group effort: + +* [Eduard Szöcs](https://github.com/EDiLD) +* [Zachary Foster](https://github.com/zachary-foster) +* [Carl Boettiger](https://github.com/cboettig) +* [Karthik Ram](https://github.com/karthik) +* [Jari Oksanen](https://github.com/jarioksa) +* [Francis Michonneau](https://github.com/fmichonneau) +* [Oliver Keyes](https://github.com/Ironholds) +* [David LeBauer](https://github.com/dlebauer) +* [Ben Marwick](https://github.com/benmarwick) +* [Anirvan Chatterjee](https://github.com/anirvan) + +In addition, we've had lots of feedback from users, including feature requests and bug reports, making `taxize` a lot better. + +## Setup + + +```r +library("devtools") +library("httr") +library("ggplot2") +library("stringr") +library("plyr") +library("dplyr") +``` + +## Define functions + + +```r +github_auth <- function(appname = getOption("gh_appname"), key = getOption("gh_id"), + secret = getOption("gh_secret")) { + if (is.null(getOption("gh_token"))) { + myapp <- oauth_app(appname, key, secret) + token <- oauth2.0_token(oauth_endpoints("github"), myapp) + options(gh_token = token) + } else { + token <- getOption("gh_token") + } + return(token) +} + +make_url <- function(x, y, z) { + sprintf("https://api.github.com/repos/%s/%s/%s", x, y, z) +} + +process_result <- function(x) { + stop_for_status(x) + if (!x$headers$`content-type` == "application/json; charset=utf-8") + stop("content type mismatch") + tmp <- content(x, as = "text") + jsonlite::fromJSON(tmp, flatten = TRUE) +} + +gh_commits <- function(repo, owner = "ropensci", ...) { + token <- github_auth() + outout <- list(); iter <- 0; nexturl <- "dontstop" + while(nexturl != "stop"){ + iter <- iter + 1 + req <- if(grepl("https:/", nexturl)) GET(nexturl, config = c(token = token)) else GET(make_url(owner, repo, "commits"), query = list(per_page=100), config = c(token = token)) + outout[[iter]] <- process_result(req) + link <- req$headers$link + nexturl <- if(is.null(link)){ "stop" } else { + if(grepl("next", link)){ + stringr::str_extract(link, "https://[0-9A-Za-z/?=\\._&]+") + } else { + "stop" + } + } + } + outout <- outout[sapply(outout, function(x) !identical(x, list()))] + dplyr::rbind_all(outout) +} + +gh_issues <- function(repo, owner = "ropensci", ...) { + token <- github_auth() + outout <- list(); iter <- 0; nexturl <- "dontstop" + while(nexturl != "stop"){ + iter <- iter + 1 + req <- if(grepl("https:/", nexturl)) GET(nexturl, query=list(state="all"), config = c(token = token)) else GET(make_url(owner, repo, "issues"), query = list(per_page=100, state="all"), config = c(token = token)) + outout[[iter]] <- process_result(req) + link <- req$headers$link + nexturl <- if(is.null(link)){ "stop" } else { + if(grepl("next", link)){ + stringr::str_extract(link, "https://[0-9A-Za-z/?=\\._&]+") + } else { + "stop" + } + } + } + outout <- outout[sapply(outout, function(x) !identical(x, list()))] + dplyr::rbind_all(outout) +} + +gh_commit <- function(sha, repo, owner = "ropensci", ...) { + token <- github_auth() + req <- GET(paste0(make_url(owner, repo, "commits"), "/", sha), + config = c(token = token, ...)) + process_result(req) +} + +gh_verb <- function(owner = "ropensci", repo, verb, args=list(), ...) { + token <- github_auth() + req <- GET(make_url(owner, repo, verb), query=args, config = c(token = token, ...)) + process_result(req) +} +``` + +## Commits + +List of commits + + +```r +out <- gh_commits("taxize") +``` + +Get changes for each commit + + +```r +changes <- vapply(out$sha, function(x) gh_commit(x, repo="taxize")$stats$total, numeric(1)) +changesdf <- data.frame(changes=unname(changes), sha=names(changes)) +``` + +Combine + + +```r +out <- inner_join(out, changesdf) +``` + +Total changes through time (additions + deletions) + + +```r +ct <- function(x) as.POSIXct(x, format="%Y-%m-%dT%H:%M:%SZ", tz="UTC") +out %>% + mutate(commit.committer.date = ct(commit.committer.date)) %>% + ggplot(aes(x=commit.committer.date, y=changes)) + + geom_area(fill="#87D2A0") + + theme_grey(base_size = 20) +``` + +![](/public/img/2014-11-28-taxize-1000/unnamed-chunk-7-1.png) + +By Authors + + +```r +out %>% + group_by(author.login) %>% + summarise(n = n()) %>% + ggplot(aes(author.login, n)) + + geom_bar(stat = "identity", fill="#87D2A0") + + coord_flip() + + theme_grey(base_size = 20) +``` + +![](/public/img/2014-11-28-taxize-1000/unnamed-chunk-8-1.png) + +## Issues + + +```r +out <- gh_issues("taxize") +``` + +Number of issues + + +```r +NROW(out) +#> [1] 382 +``` + +Number of open issues + + +```r +out %>% + filter(state == "open") %>% + NROW +#> [1] 35 +``` + +Number of pull requests + + +```r +out %>% + filter(!is.na(pull_request.url)) %>% + NROW +#> [1] 119 +``` + +## Forks, number of + + +```r +NROW(gh_verb(repo = "taxize", verb="forks")) +#> [1] 16 +``` + +## Stars, number of + + +```r +NROW(gh_verb(repo = "taxize", verb="stargazers", args=list(per_page=100))) +#> [1] 44 +``` + +## Watchers, number of + + +```r +NROW(gh_verb(repo = "taxize", verb="subscribers", args=list(per_page=100))) +#> [1] 12 +```
    diff --git a/_site/page13/index.html b/_site/page13/index.html index a82f3b7206..35298bbaf1 100644 --- a/_site/page13/index.html +++ b/_site/page13/index.html @@ -61,567 +61,962 @@

    Recology

    - - Fun with the GitHub API + + Intro to alpha ckanr - R client for CKAN RESTful API

    -

    Recently I've had fun playing with the GitHub API, and here are some notes to self about this fun having.

    - -

    Setup

    - -

    Get/load packages

    -
    install.packages(c('devtools','jsonlite','httr','yaml'))
    -
    library("devtools")
    -library("httr")
    -library("yaml")
    -
    -

    Define a vector of package names

    -
    pkgs <- c("alm", "bmc", "bold", "clifro", "ecoengine",
    -  "elastic", "fulltext", "geonames", "gistr",
    -  "RNeXML", "rnoaa", "rnpn", "traits", "rplos", "rsnps",
    -  "rWBclimate", "solr", "spocc", "taxize", "togeojson", "treeBASE")
    -pkgs <- sort(pkgs)
    -
    -

    Define functions

    -
    github_auth <- function(appname = getOption("gh_appname"), key = getOption("gh_id"),
    -                        secret = getOption("gh_secret")) {
    -  if (is.null(getOption("gh_token"))) {
    -    myapp <- oauth_app(appname, key, secret)
    -    token <- oauth2.0_token(oauth_endpoints("github"), myapp)
    -    options(gh_token = token)
    -  } else {
    -    token <- getOption("gh_token")
    -  }
    -  return(token)
    -}
    -
    -make_url <- function(x, y, z) {
    -  sprintf("https://api.github.com/repos/%s/%s/%s", x, y, z)
    -}
    -
    -process_result <- function(x) {
    -  stop_for_status(x)
    -  if (!x$headers$`content-type` == "application/json; charset=utf-8")
    -    stop("content type mismatch")
    -  tmp <- content(x, as = "text")
    -  jsonlite::fromJSON(tmp, flatten = TRUE)
    -}
    -
    -parse_file <- function(x) {
    -  tmp <- gsub("\n\\s+", "\n", 
    -              paste(vapply(strsplit(x, "\n")[[1]], RCurl::base64Decode,
    -                           character(1), USE.NAMES = FALSE), collapse = " "))
    -  lines <- readLines(textConnection(tmp))
    -  vapply(lines, gsub, character(1), pattern = "\\s", replacement = "",
    -         USE.NAMES = FALSE)
    -}
    -
    -request <- function(owner = "ropensci", repo, file="DESCRIPTION", ...) {
    -  req <- GET(make_url(owner, repo, paste0("contents/", file)), 
    -             config = c(token = github_auth(), ...))
    -  if(req$status_code != 200) { NA } else {
    -    cts <- process_result(req)$content
    -    parse_file(cts)
    -  }
    -}
    -
    -has_term <- function(what, ...) any(grepl(what, request(...)))
    -has_file <- function(what, ...) if(all(is.na(request(file = what, ...)))) FALSE else TRUE
    -
    -

    Do stuff

    - -

    Does a package depend on a particular package? e.g., look for httr in the DESCRIPTION file (which is the default file name in request() above)

    -
    has_term("httr", repo="taxize")
    -#> [1] TRUE
    -has_term("maptools", repo="taxize")
    -#> [1] FALSE
    -
    -

    Do a series of R packages have a file for how to contribute CONTRIBUTING.md?

    - -

    Yes

    -
    has_file("CONTRIBUTING.md", repo="taxize")
    -#> [1] TRUE
    -
    -

    Many packages

    -
    vapply(pkgs, function(x) has_file("CONTRIBUTING.md", repo=x), logical(1))
    -#>        alm        bmc       bold     clifro  ecoengine    elastic 
    -#>      FALSE      FALSE      FALSE      FALSE      FALSE      FALSE 
    -#>   fulltext   geonames      gistr     RNeXML      rnoaa       rnpn 
    -#>       TRUE      FALSE      FALSE       TRUE       TRUE      FALSE 
    -#>      rplos      rsnps rWBclimate       solr      spocc     taxize 
    -#>      FALSE      FALSE      FALSE      FALSE       TRUE       TRUE 
    -#>  togeojson     traits   treeBASE 
    -#>      FALSE      FALSE      FALSE
    -
    -

    Check rate limit

    - -

    Define function

    -
    rate_limit <- function(...) {
    -  token <- github_auth()
    -  req <- GET("https://api.github.com/rate_limit", config = c(token = token, ...))
    -  process_result(req)
    -}
    -
    -

    Check it

    -
    rate_limit()
    -#> $resources
    -#> $resources$core
    -#> $resources$core$limit
    -#> [1] 5000
    -#> 
    -#> $resources$core$remaining
    -#> [1] 4925
    -#> 
    -#> $resources$core$reset
    -#> [1] 1417031016
    -#> 
    -#> 
    -#> $resources$search
    -#> $resources$search$limit
    -#> [1] 30
    -#> 
    -#> $resources$search$remaining
    -#> [1] 30
    -#> 
    -#> $resources$search$reset
    -#> [1] 1417028069
    -#> 
    -#> 
    -#> 
    -#> $rate
    -#> $rate$limit
    -#> [1] 5000
    -#> 
    -#> $rate$remaining
    -#> [1] 4925
    -#> 
    -#> $rate$reset
    -#> [1] 1417031016
    -
    -

    Convert time to reset to human readable form

    -
    as.POSIXct(rate_limit()$rate$reset, origin="1970-01-01")
    -#> [1] "2014-11-26 11:43:36 PST"
    -
    + Recently I had need to create a client for scraping museum metadata to help out some folks that use that kind of data. It's called [musemeta](https://github.com/ropensci/musemeta). One of the data sources in that package uses the open source _data portal software_ [CKAN](http://ckan.org/), and so we can interact with [the CKAN API](http://docs.ckan.org/en/latest/api/index.html) to get data. Since many groups can use CKAN API/etc infrastucture because it's open source, I thought why not have a general purpose R client for this, since [there are other clients](https://github.com/ckan/ckan/wiki/CKAN-API-Clients) for Python, PHP, Ruby, etc. + +Here's a bit of an intro: + +## Setup + +Get/load packages + + +```r +install.packages("devtools") +devtools::install_github("ropensci/ckanr") +``` + + +```r +library("ckanr") +``` + +> Note: the default URL is for http://data.techno-science.ca/. You can change that in the `url` parameter + +## Some package details + +All API alls are `POST` requests, and handled through a helper function called `ckan_POST()`. See `ckanr:::ckan_POST` to see the function. + +There are some common parameters across most functions that are worth describing a bit: + +* `offset` (numeric) Where to start getting items from (optional, default: 0) +* `limit` (numeric) The maximum number of items to return (optional, default: 31) +* `url` Base url to use. Default: [http://data.techno-science.ca](http://data.techno-science.ca) +* `as` (character) One of `list` (default), `table`, or `json`. Parsing with table option uses `jsonlite::fromJSON(..., simplifyDataFrame = TRUE)`, which attempts to parse data to `data.frame`'s when possible, so the result can vary. +* `...` Curl args passed on to `httr::POST` + +## Changes + + +```r +changes(limit = 2, as = "table") +#> user_id timestamp +#> 1 b50449ea-1dcc-4d52-b620-fc95bf56034b 2014-11-06T18:58:08.001743 +#> 2 b50449ea-1dcc-4d52-b620-fc95bf56034b 2014-11-06T18:55:55.059527 +#> object_id +#> 1 cc6a523c-cecf-4a95-836b-295a11ce2bce +#> 2 cc6a523c-cecf-4a95-836b-295a11ce2bce +#> revision_id data.package.maintainer +#> 1 5d11079e-fc05-4121-9fd5-fe086f5e5f33 +#> 2 4a591538-0584-487b-8ed1-3260d1d09d77 +#> data.package.name data.package.metadata_modified data.package.author +#> 1 test 2014-11-06T18:55:54.772675 +#> 2 test 2014-11-06T18:55:54.772675 +#> data.package.url data.package.notes data.package.owner_org +#> 1 fafa260d-e2bf-46cd-9c35-34c1dfa46c57 +#> 2 fafa260d-e2bf-46cd-9c35-34c1dfa46c57 +#> data.package.private data.package.maintainer_email +#> 1 FALSE +#> 2 FALSE +#> data.package.author_email data.package.state data.package.version +#> 1 deleted +#> 2 active +#> data.package.creator_user_id +#> 1 b50449ea-1dcc-4d52-b620-fc95bf56034b +#> 2 b50449ea-1dcc-4d52-b620-fc95bf56034b +#> data.package.id data.package.title +#> 1 cc6a523c-cecf-4a95-836b-295a11ce2bce test +#> 2 cc6a523c-cecf-4a95-836b-295a11ce2bce test +#> data.package.revision_id data.package.type +#> 1 5d11079e-fc05-4121-9fd5-fe086f5e5f33 dataset +#> 2 4a591538-0584-487b-8ed1-3260d1d09d77 dataset +#> data.package.license_id id +#> 1 notspecified 59c308c8-68b2-4b92-bc57-129378d31882 +#> 2 notspecified a8577e2c-f742-49c2-bef3-ca3299e58704 +#> activity_type +#> 1 deleted package +#> 2 changed package +``` + +## List datasets + + +```r +datasets(as = "table") +#> [1] "artifact-data-agriculture" +#> [2] "artifact-data-aviation" +#> [3] "artifact-data-bookbinding" +#> [4] "artifact-data-chemistry" +#> [5] "artifact-data-communications" +#> [6] "artifact-data-computing-technology" +#> [7] "artifact-data-domestic-technology" +#> [8] "artifact-data-energy-electric" +#> [9] "artifact-data-exploration-and-survey" +#> [10] "artifact-data-fisheries" +#> [11] "artifact-data-forestry" +#> [12] "artifact-data-horology" +#> [13] "artifact-data-industrial-technology" +#> [14] "artifact-data-lighting-technology" +#> [15] "artifact-data-location-canada-agriculture-and-food-museum" +#> [16] "artifact-data-location-canada-aviation-and-space-museum" +#> [17] "artifact-data-location-canada-science-and-technology-museum" +#> [18] "artifact-data-marine-transportation" +#> [19] "artifact-data-mathematics" +#> [20] "artifact-data-medical-technology" +#> [21] "artifact-data-meteorology" +#> [22] "artifact-data-metrology" +#> [23] "artifact-data-mining-and-metallurgy" +#> [24] "artifact-data-motorized-ground-transportation" +#> [25] "artifact-data-non-motorized-ground-transportation" +#> [26] "artifact-data-on-loan" +#> [27] "artifact-data-photography" +#> [28] "artifact-data-physics" +#> [29] "artifact-data-printing" +#> [30] "artifact-data-railway-transportation" +#> [31] "artifact-dataset-fire-fighting" +``` + +## List tags + + +```r +tag_list('aviation', as='table') +#> vocabulary_id display_name +#> 1 NA Aviation +#> 2 NA Canada Aviation and Space Museum +#> id name +#> 1 cc1db2db-b08b-4888-897f-a17eade2461b Aviation +#> 2 8d05a650-bc7b-4b89-bcc8-c10177e60119 Canada Aviation and Space Museum +``` + +## Show tags + +Subset for readme brevity + + +```r +tag_show('Aviation')$packages[[1]][1:3] +#> $owner_org +#> [1] "fafa260d-e2bf-46cd-9c35-34c1dfa46c57" +#> +#> $maintainer +#> [1] "" +#> +#> $relationships_as_object +#> list() +``` + +## List groups + + +```r +group_list(as='table') +#> display_name description +#> 1 Communications +#> 2 Domestic and Industrial Technology +#> 3 Everything +#> 4 Location +#> 5 Resources +#> 6 Scientific Instrumentation +#> 7 Transportation +#> title +#> 1 Communications +#> 2 Domestic and Industrial Technology +#> 3 Everything +#> 4 Location +#> 5 Resources +#> 6 Scientific Instrumentation +#> 7 Transportation +#> image_display_url +#> 1 http://data.techno-science.ca/uploads/group/20141024-162305.6896412comm.jpg +#> 2 http://data.techno-science.ca/uploads/group/20141024-162324.3636615domtech.jpg +#> 3 http://data.techno-science.ca/uploads/group/20141024-162448.0656596everything.jpg +#> 4 http://data.techno-science.ca/uploads/group/20141024-162528.8786547location.jpg +#> 5 http://data.techno-science.ca/uploads/group/20141024-162608.3732604resour.jpg +#> 6 http://data.techno-science.ca/uploads/group/20141024-162549.1925831sciinst.jpg +#> 7 http://data.techno-science.ca/uploads/group/20141024-162624.1872823transport.jpg +#> approval_status is_organization state +#> 1 approved FALSE active +#> 2 approved FALSE active +#> 3 approved FALSE active +#> 4 approved FALSE active +#> 5 approved FALSE active +#> 6 approved FALSE active +#> 7 approved FALSE active +#> image_url +#> 1 20141024-162305.6896412comm.jpg +#> 2 20141024-162324.3636615domtech.jpg +#> 3 20141024-162448.0656596everything.jpg +#> 4 20141024-162528.8786547location.jpg +#> 5 20141024-162608.3732604resour.jpg +#> 6 20141024-162549.1925831sciinst.jpg +#> 7 20141024-162624.1872823transport.jpg +#> revision_id packages type +#> 1 cc302424-2e68-4fcc-9a3a-6de60748c2e4 5 group +#> 2 b7d95b87-5999-45f9-8775-c64094842551 2 group +#> 3 c2f0c59a-a543-4d67-a61f-4f387068ba53 1 group +#> 4 6816d571-d2bd-4131-b99d-80e7e6797492 4 group +#> 5 e37ee30d-577b-4349-8f0e-eaa4543497e8 6 group +#> 6 74eba42e-08b3-4400-b40f-3d6159ae6e9d 10 group +#> 7 a6cc4aab-eae9-42ba-9ab4-cbf45d5c6a0e 7 group +#> id name +#> 1 5268ce18-e3b8-4802-b29e-30740b46e52d communications +#> 2 5a9a8095-9e0c-485e-84f6-77f577607991 domestic-and-industrial-technology +#> 3 d7dd233e-a1cc-43da-8152-f7ed15d26756 everything +#> 4 770fc9c0-d4f3-48b0-a4ee-e00c6882df1d location +#> 5 f6c205de-cc95-4308-ac9f-5a63f1a5c7ee resources +#> 6 b98ff457-2031-48b6-b681-9adb3afc501b scientific-instrumentation +#> 7 a73bf7be-310d-472e-83e1-43a3d87602ba transportation +``` + +## Show groups + +Subset for readme brevity + + +```r +group_show('communications', as='table')$users +#> openid about capacity name created +#> 1 NA admin marc 2014-10-24T14:44:29.885262 +#> 2 NA admin sepandar 2014-10-23T19:40:42.056418 +#> email_hash sysadmin +#> 1 a32002c960476614370a16e9fb81f436 FALSE +#> 2 10b930a228afd1da2647d62e70b71bf8 TRUE +#> activity_streams_email_notifications state number_of_edits +#> 1 FALSE active 376 +#> 2 FALSE active 44 +#> number_administered_packages display_name fullname +#> 1 39 marc +#> 2 1 sepandar +#> id +#> 1 27778230-2e90-4818-9f00-bbf778c8fa09 +#> 2 b50449ea-1dcc-4d52-b620-fc95bf56034b +``` + +## Show a package + + +```r +package_show('34d60b13-1fd5-430e-b0ec-c8bc7f4841cf', as='table')$resources +#> resource_group_id cache_last_updated +#> 1 ea8533d9-cdc6-4e0e-97b9-894e06d50b92 NA +#> 2 ea8533d9-cdc6-4e0e-97b9-894e06d50b92 NA +#> 3 ea8533d9-cdc6-4e0e-97b9-894e06d50b92 NA +#> 4 ea8533d9-cdc6-4e0e-97b9-894e06d50b92 NA +#> revision_timestamp webstore_last_updated +#> 1 2014-10-28T18:13:22.213530 NA +#> 2 2014-11-04T02:59:50.567068 NA +#> 3 2014-11-05T21:23:58.533397 NA +#> 4 2014-11-05T21:25:16.848423 NA +#> id size state hash +#> 1 be2b0af8-24a8-4a55-8b30-89f5459b713a NA active +#> 2 7d65910e-4bdc-4f06-a213-e24e36762767 NA active +#> 3 97622ad7-1507-4f6a-8acb-14e826447389 NA active +#> 4 7a72498a-c49c-4e84-8b10-58991de10df6 NA active +#> description format +#> 1 XML Dataset XML +#> 2 Data dictionary for CSTMC artifact datasets. XLS +#> 3 Tips for using the artifacts datasets. .php +#> 4 Tips for using the artifacts datasets. .php +#> tracking_summary.total tracking_summary.recent mimetype_inner url_type +#> 1 0 0 NA NA +#> 2 0 0 NA NA +#> 3 0 0 NA NA +#> 4 0 0 NA NA +#> mimetype cache_url name +#> 1 NA NA Artifact Data - Vacuum Tubes (XML) +#> 2 NA NA Data Dictionary +#> 3 NA NA Tips (English) +#> 4 NA NA Tips (French) +#> created +#> 1 2014-10-28T18:13:22.240393 +#> 2 2014-11-04T02:59:50.643658 +#> 3 2014-11-04T18:14:23.952937 +#> 4 2014-11-05T21:25:16.887796 +#> url +#> 1 http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/groups-groupes/vacuum-tubes-tubes-electronique.xml +#> 2 http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/cstmc-artifact-data-dictionary-dictionnaire-de-donnees-artefacts-smstc.xls +#> 3 http://techno-science.ca/en/open-data/tips-using-artifact-open-data-set.php +#> 4 http://techno-science.ca/fr/donnees-ouvertes/conseils-donnees-ouvertes-artefacts.php +#> webstore_url last_modified position revision_id +#> 1 NA NA 0 9a27d884-f181-4842-ab47-cda35a8bf99a +#> 2 NA NA 1 5d27b3e6-7870-4c12-a122-9e9f5adee4a0 +#> 3 NA NA 2 40993f16-402b-439c-9288-2f2b177e4b8f +#> 4 NA NA 3 57f1488e-a140-4eb6-9329-fc13202a73af +#> resource_type +#> 1 NA +#> 2 NA +#> 3 NA +#> 4 NA +``` + +## Search for packages + + +```r +out <- package_search(q = '*:*', rows = 2, as="table")$results +out[, !names(out) %in% 'resources'] +#> license_title maintainer relationships_as_object +#> 1 Open Government Licence - Canada NULL +#> 2 Open Government Licence - Canada NULL +#> private maintainer_email revision_timestamp +#> 1 FALSE 2014-11-05T23:17:46.220002 +#> 2 FALSE 2014-11-05T23:17:04.923594 +#> id metadata_created +#> 1 35d5484d-38ce-495e-8722-7857c4fd17bf 2014-10-28T20:13:11.572558 +#> 2 da65507d-b018-4d3b-bde3-5419cf29d144 2014-10-28T14:59:21.386177 +#> metadata_modified author author_email state version +#> 1 2014-11-05T23:17:46.220657 active +#> 2 2014-11-05T23:17:04.924229 active +#> creator_user_id type num_resources +#> 1 27778230-2e90-4818-9f00-bbf778c8fa09 dataset 4 +#> 2 27778230-2e90-4818-9f00-bbf778c8fa09 dataset 4 +#> tags +#> 1 NA, Location, Location, 2014-10-28T20:13:11.572558, active, da88c5a2-3766-41ea-a75b-9c87047cc528 +#> 2 NA, Computing Technology, Computing Technology, 2014-10-28T14:59:21.386177, active, 5371dc28-9ce8-4f21-9afb-1f155f132bfe +#> tracking_summary.total tracking_summary.recent +#> 1 35 10 +#> 2 24 8 +#> groups +#> 1 Location, , http://data.techno-science.ca/uploads/group/20141024-162528.8786547location.jpg, Location, 770fc9c0-d4f3-48b0-a4ee-e00c6882df1d, location +#> 2 Scientific Instrumentation, , http://data.techno-science.ca/uploads/group/20141024-162549.1925831sciinst.jpg, Scientific Instrumentation, b98ff457-2031-48b6-b681-9adb3afc501b, scientific-instrumentation +#> license_id relationships_as_subject num_tags organization.description +#> 1 ca-ogl-lgo NULL 1 +#> 2 ca-ogl-lgo NULL 1 +#> organization.created organization.title organization.name +#> 1 2014-10-24T14:49:36.878579 CSTMC cstmc +#> 2 2014-10-24T14:49:36.878579 CSTMC cstmc +#> organization.revision_timestamp organization.is_organization +#> 1 2014-10-24T14:49:36.813670 TRUE +#> 2 2014-10-24T14:49:36.813670 TRUE +#> organization.state organization.image_url +#> 1 active +#> 2 active +#> organization.revision_id organization.type +#> 1 7a325a56-46f1-419c-b7b2-ec7501edb35a organization +#> 2 7a325a56-46f1-419c-b7b2-ec7501edb35a organization +#> organization.id organization.approval_status +#> 1 fafa260d-e2bf-46cd-9c35-34c1dfa46c57 approved +#> 2 fafa260d-e2bf-46cd-9c35-34c1dfa46c57 approved +#> name isopen url +#> 1 artifact-data-location-canada-science-and-technology-museum FALSE +#> 2 artifact-data-computing-technology FALSE +#> notes +#> 1 This dataset includes artifacts in the collection of the Canada Science and Technology Museums Corporation that are currently in the Canada Science and Technology Museum. +#> 2 This dataset includes artifacts in the collection of the Canada Science and Technology Museums Corporation related to computing technology. +#> owner_org extras +#> 1 fafa260d-e2bf-46cd-9c35-34c1dfa46c57 NULL +#> 2 fafa260d-e2bf-46cd-9c35-34c1dfa46c57 NULL +#> license_url +#> 1 http://data.gc.ca/eng/open-government-licence-canada +#> 2 http://data.gc.ca/eng/open-government-licence-canada +#> title +#> 1 Artifact Data - Location - Canada Science and Technology Museum +#> 2 Artifact Data - Computing Technology +#> revision_id +#> 1 694a977a-c238-47a4-8671-caddca4edfca +#> 2 858cb240-76a0-406a-800c-e4ae6cc56ab9 +``` + +## Search for resources + + +```r +resource_search(q = 'name:data', limit = 2, as='table') +#> $count +#> [1] 71 +#> +#> $results +#> resource_group_id cache_last_updated +#> 1 01a82e52-01bf-4a9c-9b45-c4f9b92529fa NA +#> 2 01a82e52-01bf-4a9c-9b45-c4f9b92529fa NA +#> webstore_last_updated id size state +#> 1 NA e179e910-27fb-44f4-a627-99822af49ffa NA active +#> 2 NA ba84e8b7-b388-4d2a-873a-7b107eb7f135 NA active +#> last_modified hash description format +#> 1 NA XML Dataset XML +#> 2 NA Data dictionary for CSTMC artifact datasets. XLS +#> mimetype_inner url_type mimetype cache_url +#> 1 NA NA NA NA +#> 2 NA NA NA NA +#> name created +#> 1 Artifact Data - Exploration and Survey (XML) 2014-10-28T15:50:35.374303 +#> 2 Data Dictionary 2014-11-03T18:01:02.094210 +#> url +#> 1 http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/groups-groupes/exploration-and-survey-exploration-et-leve.xml +#> 2 http://source.techno-science.ca/datasets-donn%C3%A9es/artifacts-artefacts/cstmc-artifact-data-dictionary-dictionnaire-de-donnees-artefacts-smstc.xls +#> webstore_url position revision_id resource_type +#> 1 NA 0 a22e6741-3e89-4db0-a802-ba594b1c1fad NA +#> 2 NA 1 da1f8585-521d-47ef-8ead-7832474a3421 NA +``` + +## Future work + +* There's already [an issue](https://github.com/ropensci/ckanr/issues/4) to add support for [DataStore](http://docs.ckan.org/en/latest/maintaining/datastore.html) +* This client needs to be tested against many other CKAN API instances to make sure it's robust +* Add a test suite +* Use cases: would be nice to include in the package documentation use cases +* Other things? Get in touch on twitter `@recology_` or below +

    - - sofa - reboot + + Fun with the GitHub API

    - + + + Recently I've had fun playing with the GitHub API, and here are some notes to self about this fun having. + +## Setup + +Get/load packages + + +```r +install.packages(c('devtools','jsonlite','httr','yaml')) +``` + + +```r +library("devtools") +library("httr") +library("yaml") +``` -

    I've reworked sofa recently after someone reported a bug in the package. Since the last post on this package on 2013-06-21, there's a bunch of changes:

    - -
      -
    • Removed the sofa_ prefix from all functions as it wasn't really necessary.
    • -
    • Replaced rjson/RJSONIO with jsonlite for JSON I/O.
    • -
    • New functions: - -
        -
      • revisions() - to get the revision numbers for a document.
      • -
      • uuids() - get any number of UUIDs - e.g., if you want to set document IDs with UUIDs
      • -
    • -
    • Most functions that deal with documents are prefixed with doc_
    • -
    • Functions that deal with databases are prefixed with db_
    • -
    • Simplified all code, reducing duplication
    • -
    • All functions take cushion as the first parameter, for consistency sake.
    • -
    • Changed cushion() function so that you can only register one cushion with each function call, -and the function takes parameters for each element now, name (name of the cushion, whatever you want), user (user name, if applicable), pwd (password, if applicable), type (one of localhost, cloudant, or iriscouch), and port (if applicable).
    • -
    • Changed package license from CC0 to MIT
    • -
    - -

    There's still more to do, but I'm pretty happy with the recent changes, and I hope at least some find the package useful. Also, would love people to try it out as all bugs are shallow and all that...

    - -

    The following are a few examples of package use.

    - -

    Install CouchDB

    - -

    Instructions here

    - -

    Start CouchDB

    - -

    In your terminal

    -
    couchdb
    -
    -

    You can interact with your CouchDB databases as well in your browser. Navigate to http://localhost:5984/_utils

    - -

    Install sofa

    -
    install.packages("devtools")
    -devtools::install_github("sckott/sofa")
    -
    library('sofa')
    -
    -

    Authenticate - Cushions

    - -

    As an example, here's how I set up details for connecting to my Cloudant couch:

    -
    cushion(name = 'cloudant', user = '<user name>', pwd = '<password>', type = "cloudant")
    -
    -

    By default there is a built-in cushion for localhost so you don't have to do that, unless you want to change those details, e.g., the port number. Right now cushions aren't preserved across R sessions, but working on that.

    - -

    For example, I'll lay down a cushion for Cloudant, then I can call cushions() to see my cushions:

    -
    cushion(name = 'cloudant', user = '<user name>', pwd = '<pwd>', type = "cloudant")
    -cushions()
    -
    -

    By default, if you don't provide a cushion name, you are using localhost.

    - -

    Ping the server

    -
    ping()
    -#> $couchdb
    -#> [1] "Welcome"
    -#> 
    -#> $uuid
    -#> [1] "2c10f0c6d9bd17205b692ae93cd4cf1d"
    -#> 
    -#> $version
    -#> [1] "1.6.0"
    -#> 
    -#> $vendor
    -#> $vendor$version
    -#> [1] "1.6.0-1"
    -#> 
    -#> $vendor$name
    -#> [1] "Homebrew"
    -
    -

    Nice, it's working.

    - -

    Create a new database, and list available databases

    -
    db_create(dbname='sofadb')
    -#> $ok
    -#> [1] TRUE
    -
    -

    see if its there now

    -
    db_list()
    -#>  [1] "_replicator" "_users"      "alm_couchdb" "cachecall"   "hello_earth"
    -#>  [6] "leothelion"  "mran"        "mydb"        "newdbs"      "sofadb"
    -
    -

    Create documents

    - -

    Create a document WITH a name (uses PUT)

    -
    doc1 <- '{"name":"sofa","beer":"IPA"}'
    -doc_create(dbname="sofadb", doc=doc1, docid="a_beer")
    -#> $ok
    -#> [1] TRUE
    -#> 
    -#> $id
    -#> [1] "a_beer"
    -#> 
    -#> $rev
    -#> [1] "1-a48c98c945bcc05d482bc6f938c89882"
    -
    -

    Create a document WITHOUT a name (uses POST)

    -
    doc2 <- '{"name":"sofa","icecream":"rocky road"}'
    -doc_create(dbname="sofadb", doc=doc2)
    -#> $ok
    -#> [1] TRUE
    -#> 
    -#> $id
    -#> [1] "c5c5c332c25cf62cc584647a81006f6d"
    -#> 
    -#> $rev
    -#> [1] "1-fd0da7fcb8d3afbfc5757d065c92362c"
    -
    -

    List documents

    - -

    List them

    -
    alldocs(dbname="sofadb")
    -#>                                 id                              key
    -#> 1                           a_beer                           a_beer
    -#> 2 c5c5c332c25cf62cc584647a81006f6d c5c5c332c25cf62cc584647a81006f6d
    -#>                                  rev
    -#> 1 1-a48c98c945bcc05d482bc6f938c89882
    -#> 2 1-fd0da7fcb8d3afbfc5757d065c92362c
    -
    -

    Optionally include the documents, returned as a list by default as it would be hard to parse an endless number of document formats.

    -
    alldocs(dbname="sofadb", include_docs = TRUE)
    -#> $total_rows
    -#> [1] 2
    -#> 
    -#> $offset
    -#> [1] 0
    -#> 
    -#> $rows
    -#> $rows[[1]]
    -#> $rows[[1]]$id
    -#> [1] "a_beer"
    -#> 
    -#> $rows[[1]]$key
    -#> [1] "a_beer"
    -#> 
    -#> $rows[[1]]$value
    -#> $rows[[1]]$value$rev
    -#> [1] "1-a48c98c945bcc05d482bc6f938c89882"
    -#> 
    -#> 
    -#> $rows[[1]]$doc
    -#> $rows[[1]]$doc$`_id`
    -#> [1] "a_beer"
    -#> 
    -#> $rows[[1]]$doc$`_rev`
    -#> [1] "1-a48c98c945bcc05d482bc6f938c89882"
    -#> 
    -#> $rows[[1]]$doc$name
    -#> [1] "sofa"
    -#> 
    -#> $rows[[1]]$doc$beer
    -#> [1] "IPA"
    -#> 
    -#> 
    -#> 
    -#> $rows[[2]]
    -#> $rows[[2]]$id
    -#> [1] "c5c5c332c25cf62cc584647a81006f6d"
    -#> 
    -#> $rows[[2]]$key
    -#> [1] "c5c5c332c25cf62cc584647a81006f6d"
    -#> 
    -#> $rows[[2]]$value
    -#> $rows[[2]]$value$rev
    -#> [1] "1-fd0da7fcb8d3afbfc5757d065c92362c"
    -#> 
    -#> 
    -#> $rows[[2]]$doc
    -#> $rows[[2]]$doc$`_id`
    -#> [1] "c5c5c332c25cf62cc584647a81006f6d"
    -#> 
    -#> $rows[[2]]$doc$`_rev`
    -#> [1] "1-fd0da7fcb8d3afbfc5757d065c92362c"
    -#> 
    -#> $rows[[2]]$doc$name
    -#> [1] "sofa"
    -#> 
    -#> $rows[[2]]$doc$icecream
    -#> [1] "rocky road"
    -
    -

    Update a document

    - -

    Change IPA (india pale ale) to IPL (india pale lager). We need to get revisions first as we need to include revision number when we update a document.

    -
    (revs <- revisions(dbname = "sofadb", docid = "a_beer"))
    -#> [1] "1-a48c98c945bcc05d482bc6f938c89882"
    -
    newdoc <- '{"name":"sofa","beer":"IPL"}'
    -doc_update(dbname = "sofadb", doc = newdoc, docid = "a_beer", rev = revs[1])
    -#> $ok
    -#> [1] TRUE
    -#> 
    -#> $id
    -#> [1] "a_beer"
    -#> 
    -#> $rev
    -#> [1] "2-f2390eb18b8f9a870c915c6712a7f65e"
    -
    -

    Should be two revisions now

    -
    revisions(dbname = "sofadb", docid = "a_beer")
    -#> [1] "2-f2390eb18b8f9a870c915c6712a7f65e"
    -#> [2] "1-a48c98c945bcc05d482bc6f938c89882"
    -
    -

    Get headers for a document

    -
    doc_head(dbname = "sofadb", docid = "a_beer")
    -#> [[1]]
    -#> [[1]]$status
    -#> [1] 200
    -#> 
    -#> [[1]]$version
    -#> [1] "HTTP/1.1"
    -#> 
    -#> [[1]]$headers
    -#> $server
    -#> [1] "CouchDB/1.6.0 (Erlang OTP/17)"
    -#> 
    -#> $etag
    -#> [1] "\"2-f2390eb18b8f9a870c915c6712a7f65e\""
    -#> 
    -#> $date
    -#> [1] "Tue, 18 Nov 2014 21:19:16 GMT"
    -#> 
    -#> $`content-type`
    -#> [1] "application/json"
    -#> 
    -#> $`content-length`
    -#> [1] "88"
    -#> 
    -#> $`cache-control`
    -#> [1] "must-revalidate"
    -#> 
    -#> attr(,"class")
    -#> [1] "insensitive" "list"
    -
    -

    JSON vs. list

    - -

    Across all/most functions you can request json or list as output with the as parameter.

    -
    db_list(as = "list")
    -#>  [1] "_replicator" "_users"      "alm_couchdb" "cachecall"   "hello_earth"
    -#>  [6] "leothelion"  "mran"        "mydb"        "newdbs"      "sofadb"
    -
    db_list(as = "json")
    -#> [1] "[\"_replicator\",\"_users\",\"alm_couchdb\",\"cachecall\",\"hello_earth\",\"leothelion\",\"mran\",\"mydb\",\"newdbs\",\"sofadb\"]\n"
    -
    -

    Curl options

    - -

    Across all functions you can pass in curl options. We're using httr internally, so you can use httr helper functions to make some curl options easier. Examples:

    - -

    Verbose output

    -
    library("httr")
    -db_list(config=verbose())
    -#>  [1] "_replicator" "_users"      "alm_couchdb" "cachecall"   "hello_earth"
    -#>  [6] "leothelion"  "mran"        "mydb"        "newdbs"      "sofadb"
    -
    -

    Progress

    -
    db_list(config=progress())
    -#> 
    -  |                                                                       
    -  |                                                                 |   0%
    -  |                                                                       
    -  |=================================================================| 100%
    -#>  [1] "_replicator" "_users"      "alm_couchdb" "cachecall"   "hello_earth"
    -#>  [6] "leothelion"  "mran"        "mydb"        "newdbs"      "sofadb"
    -
    -

    Set a timeout

    -
    db_list(config=timeout(seconds = 0.001))
    -#> 
    -#> Error in function (type, msg, asError = TRUE)  : 
    -#>    Operation timed out after 3 milliseconds with 0 out of -1 bytes received
    -
    -

    Full text search

    - -

    I'm working on an R client for Elaticsearch called elastic - find it at https://github.com/ropensci/elastic

    - -

    Thinking about where to include functions to allow elastic and sofa to work together...if you have any thoughts hit up the issues. I'll probably include helper functions for CouchDB search in the elastic package, interfacing with the CouchDB plugin for Elasticsearch.

    +## Define a vector of package names + + +```r +pkgs <- c("alm", "bmc", "bold", "clifro", "ecoengine", + "elastic", "fulltext", "geonames", "gistr", + "RNeXML", "rnoaa", "rnpn", "traits", "rplos", "rsnps", + "rWBclimate", "solr", "spocc", "taxize", "togeojson", "treeBASE") +pkgs <- sort(pkgs) +``` + +## Define functions + + +```r +github_auth <- function(appname = getOption("gh_appname"), key = getOption("gh_id"), + secret = getOption("gh_secret")) { + if (is.null(getOption("gh_token"))) { + myapp <- oauth_app(appname, key, secret) + token <- oauth2.0_token(oauth_endpoints("github"), myapp) + options(gh_token = token) + } else { + token <- getOption("gh_token") + } + return(token) +} + +make_url <- function(x, y, z) { + sprintf("https://api.github.com/repos/%s/%s/%s", x, y, z) +} + +process_result <- function(x) { + stop_for_status(x) + if (!x$headers$`content-type` == "application/json; charset=utf-8") + stop("content type mismatch") + tmp <- content(x, as = "text") + jsonlite::fromJSON(tmp, flatten = TRUE) +} + +parse_file <- function(x) { + tmp <- gsub("\n\\s+", "\n", + paste(vapply(strsplit(x, "\n")[[1]], RCurl::base64Decode, + character(1), USE.NAMES = FALSE), collapse = " ")) + lines <- readLines(textConnection(tmp)) + vapply(lines, gsub, character(1), pattern = "\\s", replacement = "", + USE.NAMES = FALSE) +} + +request <- function(owner = "ropensci", repo, file="DESCRIPTION", ...) { + req <- GET(make_url(owner, repo, paste0("contents/", file)), + config = c(token = github_auth(), ...)) + if(req$status_code != 200) { NA } else { + cts <- process_result(req)$content + parse_file(cts) + } +} + +has_term <- function(what, ...) any(grepl(what, request(...))) +has_file <- function(what, ...) if(all(is.na(request(file = what, ...)))) FALSE else TRUE +``` + +## Do stuff + +Does a package depend on a particular package? e.g., look for `httr` in the `DESCRIPTION` file (which is the default file name in `request()` above) + + +```r +has_term("httr", repo="taxize") +#> [1] TRUE +has_term("maptools", repo="taxize") +#> [1] FALSE +``` + +Do a series of R packages have a file for how to contribute `CONTRIBUTING.md`? + +Yes + + +```r +has_file("CONTRIBUTING.md", repo="taxize") +#> [1] TRUE +``` + +Many packages + + +```r +vapply(pkgs, function(x) has_file("CONTRIBUTING.md", repo=x), logical(1)) +#> alm bmc bold clifro ecoengine elastic +#> FALSE FALSE FALSE FALSE FALSE FALSE +#> fulltext geonames gistr RNeXML rnoaa rnpn +#> TRUE FALSE FALSE TRUE TRUE FALSE +#> rplos rsnps rWBclimate solr spocc taxize +#> FALSE FALSE FALSE FALSE TRUE TRUE +#> togeojson traits treeBASE +#> FALSE FALSE FALSE +``` + +## Check rate limit + +Define function + + +```r +rate_limit <- function(...) { + token <- github_auth() + req <- GET("https://api.github.com/rate_limit", config = c(token = token, ...)) + process_result(req) +} +``` + +Check it + + +```r +rate_limit() +#> $resources +#> $resources$core +#> $resources$core$limit +#> [1] 5000 +#> +#> $resources$core$remaining +#> [1] 4925 +#> +#> $resources$core$reset +#> [1] 1417031016 +#> +#> +#> $resources$search +#> $resources$search$limit +#> [1] 30 +#> +#> $resources$search$remaining +#> [1] 30 +#> +#> $resources$search$reset +#> [1] 1417028069 +#> +#> +#> +#> $rate +#> $rate$limit +#> [1] 5000 +#> +#> $rate$remaining +#> [1] 4925 +#> +#> $rate$reset +#> [1] 1417031016 +``` + +Convert time to reset to human readable form + + +```r +as.POSIXct(rate_limit()$rate$reset, origin="1970-01-01") +#> [1] "2014-11-26 11:43:36 PST" +```

    - - Conditionality meta-analysis data + + sofa - reboot

    - - -

    The paper

    - -

    One paper from my graduate work asked most generally ~ "How much does the variation in magnitudes and signs of species interaction outcomes vary?". More specifically, we wanted to know if variation differed among species interaction classes (mutualism, competition, predation), and among various "gradients" (space, time, etc.). To answer this question, we used a meta-analysis approach (rather than e.g., a field experiment). We published the paper recently.

    - -
    -

    p.s. I really really wish we would have put it in an open access journal...

    -
    - -

    The data

    - -

    Anyway, I'm here to talk about the data. We didn't get the data up with the paper, but it is up on Figshare now. The files there are the following:

    - -
      -
    • coniditionality.R - script used to process the data from variables_prelim.csv
    • -
    • variables_prelim.csv - description of variables in the preliminary data set, matches conditionality_data_prelim.csv
    • -
    • variables_used.csv - description of variables in the used data set, matches conditionality_data_used.csv
    • -
    • conditionality_data_prelim.csv - preliminary data, the raw data
    • -
    • conditionality_data_used.csv - the data used for our paper
    • -
    • README.md - the readme
    • -
    • paper_selection.csv - the list of papers we went through, with remarks about paper selection
    • -
    - -

    Please do play with the data, publish some papers, etc, etc. It took 6 of us about 4 years to collect this data; we skimmed through ~11,000 papers on the first pass (aka. skimming through abstracts in Google Scholar and Web of Science), then decided on nearly 500 papers to get data from, and narrowed down to 247 papers for the publication mentioned above. Now, there was no funding for this, so it was sort of done in between other projects, but still, it was simply A LOT of tables to digitize, and graphs to extract data points from. Anyway, hopefully you will find this data useful :p

    - -

    EML

    - -

    I think this dataset would be a great introduction to the potential power of EML (Ecological Metadata Langauge). At rOpenSci, one of our team Carl Boettiger, along with Claas-Thido Pfaff, Duncan Temple Lang, Karthik Ram, and Matt Jones, have created an R client for EML, to parse EML files and to create and publish them.

    - -

    What is EML?/Why EML?

    - -

    A demonstration is in order...

    - -

    Example using EML with this dataset

    - -

    Install EML

    -
    library("devtools")
    -install.packages("RHTMLForms", repos = "http://www.omegahat.org/R/", type="source")
    -install_github("ropensci/EML", build=FALSE, dependencies=c("DEPENDS", "IMPORTS"))
    -
    -

    Load EML

    -
    library('EML')
    -
    -

    Prepare metadata

    -
    # dataset
    -prelim_dat <- read.csv("conditionality_data_prelim.csv")
    -# variable descriptions for each column
    -prelim_vars <- read.csv("variables_prelim.csv", stringsAsFactors = FALSE)
    -
    -

    Get column definitions in a vector

    -
    col_defs <- prelim_vars$description
    -
    -

    Create unit definitions for each column

    -
    unit_defs <- list(
    -  c(unit = "number",
    -    bounds = c(0, Inf)),
    -  c(unit = "number",
    -    bounds = c(0, Inf)),
    -  "independent replicates",
    -  c(unit = "number",
    -    bounds = c(0, Inf)),
    -
    -  ... <CUTOFF>
    -)
    -
    -

    Write an EML file

    -
    eml_write(prelim_dat,
    -          unit.defs = unit_defs,
    -          col.defs = col_defs,
    -          creator = "Scott Chamberlain",
    -          contact = "myrmecocystus@gmail.com",
    -          file = "conditionality_data_prelim_eml.xml")
    -
    ## [1] "conditionality_data_prelim_eml.xml"
    -
    -

    Validate the EML file

    -
    eml_validate("conditionality_data_prelim_eml.xml")
    -
    ## EML specific tests XML specific tests 
    -##               TRUE               TRUE
    -
    -

    Read data and metadata

    -
    gg <- eml_read("conditionality_data_prelim_eml.xml")
    -eml_get(gg, "contact")
    -
    ## [1] "myrmecocystus@gmail.com"
    -
    eml_get(gg, "citation_info")
    -
    ## Chamberlain S (2014-10-06). _metadata_.
    -
    dat <- eml_get(gg, "data.frame")
    -head(dat[,c(1:10)])
    -
    ##   order i indrep avg author_last  finit_1 finit_2 finit_abv co_author
    -## 1     1 1      a   1      Devall margaret       s        ms     Thein
    -## 2     2 1      a   2      Devall margaret       s        ms     Thein
    -## 3     3 1      a   3      Devall margaret       s        ms     Thein
    -## 4     4 1      a   4      Devall margaret       s        ms     Thein
    -## 5     5 1      a   5      Devall margaret       s        ms     Thein
    -## 6     6 1      a   6      Devall margaret       s        ms     Thein
    -##   sinit_1
    -## 1 leonard
    -## 2 leonard
    -## 3 leonard
    -## 4 leonard
    -## 5 leonard
    -## 6 leonard
    -
    -

    Publish

    - -

    We can also use the EML package to publish the data, here to Figshare.

    - -

    First, install rfigshare

    -
    install.packages("rfigshare")
    -library('rfigshare')
    -
    -

    Then publish using eml_publish()

    -
    figid <- eml_publish(
    -            file = "conditionality_data_prelim_eml.xml",
    -            description = "EML file for Chamberlain, S.A., J.A. Rudgers, and J.L. Bronstein. 2014. How context-dependent are species interactions. Ecology Letters",
    -            categories = "Ecology",
    -            tags = "EML",
    -            destination = "figshare",
    -            visibility = "public",
    -            title = "condionality data, EML")
    -fs_make_public(figid)
    -
    -

    + + + I've reworked `sofa` recently after someone reported a bug in the package. Since the last post on this package on 2013-06-21, there's a bunch of changes: + +* Removed the `sofa_` prefix from all functions as it wasn't really necessary. +* Replaced `rjson`/`RJSONIO` with `jsonlite` for JSON I/O. +* New functions: + * `revisions()` - to get the revision numbers for a document. + * `uuids()` - get any number of UUIDs - e.g., if you want to set document IDs with UUIDs +* Most functions that deal with documents are prefixed with `doc_` +* Functions that deal with databases are prefixed with `db_` +* Simplified all code, reducing duplication +* All functions take `cushion` as the first parameter, for consistency sake. +* Changed `cushion()` function so that you can only register one cushion with each function call, +and the function takes parameters for each element now, `name` (name of the cushion, whatever you want), `user` (user name, if applicable), `pwd` (password, if applicable), `type` (one of localhost, cloudant, or iriscouch), and `port` (if applicable). +* Changed package license from `CC0` to `MIT` + +There's still more to do, but I'm pretty happy with the recent changes, and I hope at least some find the package useful. Also, would love people to try it out as all bugs are shallow and all that... + +The following are a few examples of package use. + +## Install CouchDB + +Instructions [here](http://wiki.apache.org/couchdb/Installation) + +## Start CouchDB + +In your terminal + +```sh +couchdb +``` + +You can interact with your CouchDB databases as well in your browser. Navigate to [http://localhost:5984/_utils](http://localhost:5984/_utils) + +## Install sofa + + +```r +install.packages("devtools") +devtools::install_github("sckott/sofa") +``` + + +```r +library('sofa') +``` + +## Authenticate - Cushions + +As an example, here's how I set up details for connecting to my Cloudant couch: + + +```r +cushion(name = 'cloudant', user = '', pwd = '', type = "cloudant") +``` + +By default there is a built-in `cushion` for localhost so you don't have to do that, unless you want to change those details, e.g., the port number. Right now cushions aren't preserved across R sessions, but working on that. + +For example, I'll lay down a cushion for Cloudant, then I can call `cushions()` to see my cushions: + + +```r +cushion(name = 'cloudant', user = '', pwd = '', type = "cloudant") +cushions() +``` + +By default, if you don't provide a cushion name, you are using localhost. + +## Ping the server + + +```r +ping() +#> $couchdb +#> [1] "Welcome" +#> +#> $uuid +#> [1] "2c10f0c6d9bd17205b692ae93cd4cf1d" +#> +#> $version +#> [1] "1.6.0" +#> +#> $vendor +#> $vendor$version +#> [1] "1.6.0-1" +#> +#> $vendor$name +#> [1] "Homebrew" +``` + +Nice, it's working. + +## Create a new database, and list available databases + + + + +```r +db_create(dbname='sofadb') +#> $ok +#> [1] TRUE +``` + +see if its there now + + +```r +db_list() +#> [1] "_replicator" "_users" "alm_couchdb" "cachecall" "hello_earth" +#> [6] "leothelion" "mran" "mydb" "newdbs" "sofadb" +``` + +## Create documents + +Create a document WITH a name (uses PUT) + + +```r +doc1 <- '{"name":"sofa","beer":"IPA"}' +doc_create(dbname="sofadb", doc=doc1, docid="a_beer") +#> $ok +#> [1] TRUE +#> +#> $id +#> [1] "a_beer" +#> +#> $rev +#> [1] "1-a48c98c945bcc05d482bc6f938c89882" +``` + +Create a document WITHOUT a name (uses POST) + + +```r +doc2 <- '{"name":"sofa","icecream":"rocky road"}' +doc_create(dbname="sofadb", doc=doc2) +#> $ok +#> [1] TRUE +#> +#> $id +#> [1] "c5c5c332c25cf62cc584647a81006f6d" +#> +#> $rev +#> [1] "1-fd0da7fcb8d3afbfc5757d065c92362c" +``` + +## List documents + +List them + + +```r +alldocs(dbname="sofadb") +#> id key +#> 1 a_beer a_beer +#> 2 c5c5c332c25cf62cc584647a81006f6d c5c5c332c25cf62cc584647a81006f6d +#> rev +#> 1 1-a48c98c945bcc05d482bc6f938c89882 +#> 2 1-fd0da7fcb8d3afbfc5757d065c92362c +``` + +Optionally include the documents, returned as a list by default as it would be hard to parse an endless number of document formats. + + +```r +alldocs(dbname="sofadb", include_docs = TRUE) +#> $total_rows +#> [1] 2 +#> +#> $offset +#> [1] 0 +#> +#> $rows +#> $rows[[1]] +#> $rows[[1]]$id +#> [1] "a_beer" +#> +#> $rows[[1]]$key +#> [1] "a_beer" +#> +#> $rows[[1]]$value +#> $rows[[1]]$value$rev +#> [1] "1-a48c98c945bcc05d482bc6f938c89882" +#> +#> +#> $rows[[1]]$doc +#> $rows[[1]]$doc$`_id` +#> [1] "a_beer" +#> +#> $rows[[1]]$doc$`_rev` +#> [1] "1-a48c98c945bcc05d482bc6f938c89882" +#> +#> $rows[[1]]$doc$name +#> [1] "sofa" +#> +#> $rows[[1]]$doc$beer +#> [1] "IPA" +#> +#> +#> +#> $rows[[2]] +#> $rows[[2]]$id +#> [1] "c5c5c332c25cf62cc584647a81006f6d" +#> +#> $rows[[2]]$key +#> [1] "c5c5c332c25cf62cc584647a81006f6d" +#> +#> $rows[[2]]$value +#> $rows[[2]]$value$rev +#> [1] "1-fd0da7fcb8d3afbfc5757d065c92362c" +#> +#> +#> $rows[[2]]$doc +#> $rows[[2]]$doc$`_id` +#> [1] "c5c5c332c25cf62cc584647a81006f6d" +#> +#> $rows[[2]]$doc$`_rev` +#> [1] "1-fd0da7fcb8d3afbfc5757d065c92362c" +#> +#> $rows[[2]]$doc$name +#> [1] "sofa" +#> +#> $rows[[2]]$doc$icecream +#> [1] "rocky road" +``` + +## Update a document + +Change _IPA_ (india pale ale) to _IPL_ (india pale lager). We need to get revisions first as we need to include revision number when we update a document. + + +```r +(revs <- revisions(dbname = "sofadb", docid = "a_beer")) +#> [1] "1-a48c98c945bcc05d482bc6f938c89882" +``` + + +```r +newdoc <- '{"name":"sofa","beer":"IPL"}' +doc_update(dbname = "sofadb", doc = newdoc, docid = "a_beer", rev = revs[1]) +#> $ok +#> [1] TRUE +#> +#> $id +#> [1] "a_beer" +#> +#> $rev +#> [1] "2-f2390eb18b8f9a870c915c6712a7f65e" +``` + +Should be two revisions now + + +```r +revisions(dbname = "sofadb", docid = "a_beer") +#> [1] "2-f2390eb18b8f9a870c915c6712a7f65e" +#> [2] "1-a48c98c945bcc05d482bc6f938c89882" +``` + +## Get headers for a document + + +```r +doc_head(dbname = "sofadb", docid = "a_beer") +#> [[1]] +#> [[1]]$status +#> [1] 200 +#> +#> [[1]]$version +#> [1] "HTTP/1.1" +#> +#> [[1]]$headers +#> $server +#> [1] "CouchDB/1.6.0 (Erlang OTP/17)" +#> +#> $etag +#> [1] "\"2-f2390eb18b8f9a870c915c6712a7f65e\"" +#> +#> $date +#> [1] "Tue, 18 Nov 2014 21:19:16 GMT" +#> +#> $`content-type` +#> [1] "application/json" +#> +#> $`content-length` +#> [1] "88" +#> +#> $`cache-control` +#> [1] "must-revalidate" +#> +#> attr(,"class") +#> [1] "insensitive" "list" +``` + +## JSON vs. list + +Across all/most functions you can request json or list as output with the `as` parameter. + + +```r +db_list(as = "list") +#> [1] "_replicator" "_users" "alm_couchdb" "cachecall" "hello_earth" +#> [6] "leothelion" "mran" "mydb" "newdbs" "sofadb" +``` + + +```r +db_list(as = "json") +#> [1] "[\"_replicator\",\"_users\",\"alm_couchdb\",\"cachecall\",\"hello_earth\",\"leothelion\",\"mran\",\"mydb\",\"newdbs\",\"sofadb\"]\n" +``` + +## Curl options + +Across all functions you can pass in curl options. We're using `httr` internally, so you can use `httr` helper functions to make some curl options easier. Examples: + +Verbose output + + +```r +library("httr") +db_list(config=verbose()) +#> [1] "_replicator" "_users" "alm_couchdb" "cachecall" "hello_earth" +#> [6] "leothelion" "mran" "mydb" "newdbs" "sofadb" +``` + +Progress + + +```r +db_list(config=progress()) +#> + | + | | 0% + | + |=================================================================| 100% +#> [1] "_replicator" "_users" "alm_couchdb" "cachecall" "hello_earth" +#> [6] "leothelion" "mran" "mydb" "newdbs" "sofadb" +``` + +Set a timeout + + +```r +db_list(config=timeout(seconds = 0.001)) +#> +#> Error in function (type, msg, asError = TRUE) : +#> Operation timed out after 3 milliseconds with 0 out of -1 bytes received +``` + + +## Full text search + +I'm working on an R client for Elaticsearch called `elastic` - find it at [https://github.com/ropensci/elastic](https://github.com/ropensci/elastic) + +Thinking about where to include functions to allow `elastic` and `sofa` to work together...if you have any thoughts hit up the issues. I'll probably include helper functions for CouchDB search in the `elastic` package, interfacing with the [CouchDB plugin for Elasticsearch](https://github.com/elasticsearch/elasticsearch-river-couchdb).
    diff --git a/_site/page14/index.html b/_site/page14/index.html index 9e5bb71f0d..61e5f488a1 100644 --- a/_site/page14/index.html +++ b/_site/page14/index.html @@ -59,6 +59,204 @@

    Recology

      +
    +

    + + Conditionality meta-analysis data + +

    + + + + ## The paper + +One paper from my graduate work asked most generally ~ "How much does the variation in magnitudes and signs of species interaction outcomes vary?". More specifically, we wanted to know if variation differed among species interaction classes (mutualism, competition, predation), and among various "gradients" (space, time, etc.). To answer this question, we used a meta-analysis approach (rather than e.g., a field experiment). We [published the paper][ecolett] recently. + +> p.s. I really really wish we would have put it in an open access journal... + +## The data + +Anyway, I'm here to talk about the __data__. We didn't get the data up with the paper, but it is [up on Figshare][fig] now. The files there are the following: + +* `coniditionality.R` - script used to process the data from `variables_prelim.csv` +* `variables_prelim.csv` - description of variables in the preliminary data set, matches `conditionality_data_prelim.csv` +* `variables_used.csv` - description of variables in the used data set, matches `conditionality_data_used.csv` +* `conditionality_data_prelim.csv` - preliminary data, the raw data +* `conditionality_data_used.csv` - the data used for our paper +* `README.md` - the readme +* `paper_selection.csv` - the list of papers we went through, with remarks about paper selection + +Please do play with the data, publish some papers, etc, etc. It took 6 of us about 4 years to collect this data; we skimmed through ~11,000 papers on the first pass (aka. skimming through abstracts in Google Scholar and Web of Science), then decided on nearly 500 papers to get data from, and narrowed down to 247 papers for the publication mentioned above. Now, there was no funding for this, so it was sort of done in between other projects, but still, it was simply __A LOT__ of tables to digitize, and graphs to extract data points from. __Anyway__, hopefully you will find this data useful :p + +## EML + +I think this dataset would be a great introduction to the potential power of EML ([Ecological Metadata Langauge][eml]). At [rOpenSci](http://ropensci.org/), one of our team [Carl Boettiger][carl], along with Claas-Thido Pfaff, Duncan Temple Lang, Karthik Ram, and Matt Jones, have created an R client for EML, to parse EML files and to create and publish them. + +## What is EML?/Why EML? + +A demonstration is in order... + +## Example using EML with this dataset + +### Install EML + + +```r +library("devtools") +install.packages("RHTMLForms", repos = "http://www.omegahat.org/R/", type="source") +install_github("ropensci/EML", build=FALSE, dependencies=c("DEPENDS", "IMPORTS")) +``` + +Load `EML` + + +```r +library('EML') +``` + +### Prepare metadata + + +```r +# dataset +prelim_dat <- read.csv("conditionality_data_prelim.csv") +# variable descriptions for each column +prelim_vars <- read.csv("variables_prelim.csv", stringsAsFactors = FALSE) +``` + +Get column definitions in a vector + + +```r +col_defs <- prelim_vars$description +``` + +Create unit definitions for each column + + +```r +unit_defs <- list( + c(unit = "number", + bounds = c(0, Inf)), + c(unit = "number", + bounds = c(0, Inf)), + "independent replicates", + c(unit = "number", + bounds = c(0, Inf)), + + ... +) +``` + + + + +### Write an EML file + + +```r +eml_write(prelim_dat, + unit.defs = unit_defs, + col.defs = col_defs, + creator = "Scott Chamberlain", + contact = "myrmecocystus@gmail.com", + file = "conditionality_data_prelim_eml.xml") +``` + +``` +## [1] "conditionality_data_prelim_eml.xml" +``` + +### Validate the EML file + + +```r +eml_validate("conditionality_data_prelim_eml.xml") +``` + +``` +## EML specific tests XML specific tests +## TRUE TRUE +``` + +### Read data and metadata + + +```r +gg <- eml_read("conditionality_data_prelim_eml.xml") +eml_get(gg, "contact") +``` + +``` +## [1] "myrmecocystus@gmail.com" +``` + +```r +eml_get(gg, "citation_info") +``` + +``` +## Chamberlain S (2014-10-06). _metadata_. +``` + +```r +dat <- eml_get(gg, "data.frame") +head(dat[,c(1:10)]) +``` + +``` +## order i indrep avg author_last finit_1 finit_2 finit_abv co_author +## 1 1 1 a 1 Devall margaret s ms Thein +## 2 2 1 a 2 Devall margaret s ms Thein +## 3 3 1 a 3 Devall margaret s ms Thein +## 4 4 1 a 4 Devall margaret s ms Thein +## 5 5 1 a 5 Devall margaret s ms Thein +## 6 6 1 a 6 Devall margaret s ms Thein +## sinit_1 +## 1 leonard +## 2 leonard +## 3 leonard +## 4 leonard +## 5 leonard +## 6 leonard +``` + +### Publish + +We can also use the `EML` package to publish the data, here to [Figshare](http://figshare.com). + +First, install `rfigshare` + + +```r +install.packages("rfigshare") +library('rfigshare') +``` + +Then publish using `eml_publish()` + + +```r +figid <- eml_publish( + file = "conditionality_data_prelim_eml.xml", + description = "EML file for Chamberlain, S.A., J.A. Rudgers, and J.L. Bronstein. 2014. How context-dependent are species interactions. Ecology Letters", + categories = "Ecology", + tags = "EML", + destination = "figshare", + visibility = "public", + title = "condionality data, EML") +fs_make_public(figid) +``` + +![](/public/img/2014-10-06-conditionality-meta-analysis/figshare_conditional.png) + +[ecolett]: http://scottchamberlain.info/publications/ +[fig]: http://figshare.com/articles/Conditionality_data/1097657 +[eml]: https://knb.ecoinformatics.org/#external//emlparser/docs/index.html +[carl]: http://www.carlboettiger.info/ + +
    +

    @@ -68,46 +266,65 @@

    -

    My last blog post on this package was so long ago the package wrapped both New York Times APIs and Sunlight Labs APIs and the package was called govdat. I split that package up into rsunlight for Sunlight Labs APIs and rtimes for some New York Times APIs. rtimes is in development at Github.

    + My [last blog post on this package](http://recology.info/2014/05/rsunlight/) was so long ago the package wrapped both New York Times APIs and Sunlight Labs APIs and the package was called `govdat`. I split that package up into `rsunlight` for Sunlight Labs APIs and `rtimes` for some New York Times APIs. `rtimes` is [in development at Github](https://github.com/ropengov/rtimes). + +We've updated the package to include four sets of functions, one set for each of four Sunlight Labs APIs (with a separate prefix for each API): + +* Congress API (`cg_`) +* Open States API (`os_`) +* Capitol Words API (`cw_`) +* Influence Explorer API (`ie_`) + +Then there are many methods for each API. + +## rsunlight intro + +### Installation -

    We've updated the package to include four sets of functions, one set for each of four Sunlight Labs APIs (with a separate prefix for each API):

    +First, installation -
      -
    • Congress API (cg_)
    • -
    • Open States API (os_)
    • -
    • Capitol Words API (cw_)
    • -
    • Influence Explorer API (ie_)
    • -
    -

    Then there are many methods for each API.

    +```r +devtools::install_github("ropengov/rsunlight") +``` -

    rsunlight intro

    +Load the library -

    Installation

    -

    First, installation

    -
    devtools::install_github("ropengov/rsunlight")
    -
    -

    Load the library

    -
    library("rsunlight")
    -
    -

    Congress API

    +```r +library("rsunlight") +``` -

    Search for Fed level bills that include the term health care in them.

    -
    res <- cg_bills(query='health care')
    -head(res$results[,1:4])
    -
    ##          nicknames congress last_version_on sponsor_id
    +### Congress API
    +
    +Search for Fed level bills that include the term _health care_ in them.
    +
    +
    +```r
    +res <- cg_bills(query='health care')
    +head(res$results[,1:4])
    +```
    +
    +```
    +##          nicknames congress last_version_on sponsor_id
     ## 1        obamacare      111      2010-08-25    S000749
     ## 2 obamacare, ppaca      111      2010-08-25    R000053
     ## 3             NULL      113      2013-10-09    K000220
     ## 4             NULL      111      2009-01-06    I000056
     ## 5             NULL      112      2011-01-05    I000056
     ## 6             NULL      111      2009-05-05    D000197
    -
    -

    Search for bills that have the two terms transparency and accountability within 5 words of each other in the bill.

    -
    res <- cg_bills(query='transparency accountability'~5)
    -head(res$results[,1:4])
    -
    ##   congress last_version_on sponsor_id
    +```
    +
    +Search for bills that have the two terms _transparency_ and _accountability_ within 5 words of each other in the bill.
    +
    +
    +```r
    +res <- cg_bills(query='transparency accountability'~5)
    +head(res$results[,1:4])
    +```
    +
    +```
    +##   congress last_version_on sponsor_id
     ## 1      111      2009-01-15    R000435
     ## 2      113      2013-07-17    R000595
     ## 3      112      2011-12-08    R000435
    @@ -135,13 +352,20 @@ 

    Congress API

    ## 4 http://beta.congress.gov/bill/113th/house-bill/3155 ## 5 http://beta.congress.gov/bill/112th/senate-bill/1848 ## 6 http://beta.congress.gov/bill/113th/senate-bill/1347 -
    -

    Open States API

    +``` + +### Open States API + +Search State Bills, in this case search for the term _agriculture_ in Texas. + + +```r +res <- os_billsearch(terms = 'agriculture', state = 'tx') +head(res) +``` -

    Search State Bills, in this case search for the term agriculture in Texas.

    -
    res <- os_billsearch(terms = 'agriculture', state = 'tx')
    -head(res)
    -
    ##                                                                                                                                                 title
    +```
    +##                                                                                                                                                 title
     ## 1 Relating to authorizing the issuance of revenue bonds to fund capital projects at public institutions of higher education; making an appropriation.
     ## 2                          Relating to authorizing the issuance of revenue bonds to fund capital projects at public institutions of higher education.
     ## 3                          Relating to authorizing the issuance of revenue bonds to fund capital projects at public institutions of higher education.
    @@ -176,25 +400,39 @@ 

    Open States API

    ## 4 SB 6 ## 5 SB 44 ## 6 SB 1010 -
    -

    Search for legislators in California (ca) and in the democratic party

    -
    res <- os_legislatorsearch(state = 'ca', party = 'democratic', fields = c('full_name','+capitol_office.phone'))
    -head(res)
    -
    ##            phone        id       full_name
    +```
    +
    +Search for legislators in California (_ca_) and in the democratic party
    +
    +
    +```r
    +res <- os_legislatorsearch(state = 'ca', party = 'democratic', fields = c('full_name','+capitol_office.phone'))
    +head(res)
    +```
    +
    +```
    +##            phone        id       full_name
     ## 1 (916) 319-2014 CAL000058   Nancy Skinner
     ## 2 (916) 319-2015 CAL000059   Joan Buchanan
     ## 3 (916) 319-2022 CAL000084       Paul Fong
     ## 4 (916) 319-2046 CAL000089      John Pérez
     ## 5 (916) 319-2080 CAL000098 V. Manuel Pérez
     ## 6 (916) 319-2001 CAL000101  Wesley Chesbro
    -
    -

    Now you can call each representative, yay!

    +``` + +Now you can call each representative, yay! + +### Capitol Words API -

    Capitol Words API

    +Search for phrase _climate change_ used by politicians between September 5th and 16th, 2011: -

    Search for phrase climate change used by politicians between September 5th and 16th, 2011:

    -
    head(cw_text(phrase='climate change', start_date='2011-09-05', end_date='2011-09-16', party='D')[,c('speaker_last','origin_url')])
    -
    ##   speaker_last
    +
    +```r
    +head(cw_text(phrase='climate change', start_date='2011-09-05', end_date='2011-09-16', party='D')[,c('speaker_last','origin_url')])
    +```
    +
    +```
    +##   speaker_last
     ## 1      Tsongas
     ## 2       Inslee
     ## 3        Costa
    @@ -208,26 +446,37 @@ 

    Capitol Words API

    ## 4 http://origin.www.gpo.gov/fdsys/pkg/CREC-2011-09-15/html/CREC-2011-09-15-pt1-PgS5650.htm ## 5 http://origin.www.gpo.gov/fdsys/pkg/CREC-2011-09-13/html/CREC-2011-09-13-pt1-PgS5510.htm ## 6 http://origin.www.gpo.gov/fdsys/pkg/CREC-2011-09-13/html/CREC-2011-09-13-pt1-PgS5513-2.htm -
    -

    Plot mentions of the term climate change over time for Democrats vs. Republicans

    -
    library('ggplot2')
    -dat_d <- cw_timeseries(phrase='climate change', party="D")
    -dat_d$party <- rep("D", nrow(dat_d))
    -dat_r <- cw_timeseries(phrase='climate change', party="R")
    -dat_r$party <- rep("R", nrow(dat_r))
    -dat_both <- rbind(dat_d, dat_r)
    -ggplot(dat_both, aes(day, count, colour=party)) +
    -   geom_line() +
    -   theme_grey(base_size=20) +
    -   scale_colour_manual(values=c("blue","red"))
    -
    -

    plot of chunk unnamed-chunk-9

    - -

    Influence Explorer API

    - -

    Search for contributions of equal to or more than $20,000,000.

    -
    ie_contr(amount='>|20000000')[,c('amount','recipient_name','contributor_name')]
    -
    ##         amount
    +```
    +
    +Plot mentions of the term _climate change_ over time for Democrats vs. Republicans
    +
    +
    +```r
    +library('ggplot2')
    +dat_d <- cw_timeseries(phrase='climate change', party="D")
    +dat_d$party <- rep("D", nrow(dat_d))
    +dat_r <- cw_timeseries(phrase='climate change', party="R")
    +dat_r$party <- rep("R", nrow(dat_r))
    +dat_both <- rbind(dat_d, dat_r)
    +ggplot(dat_both, aes(day, count, colour=party)) +
    +   geom_line() +
    +   theme_grey(base_size=20) +
    +   scale_colour_manual(values=c("blue","red"))
    +```
    +
    +![plot of chunk unnamed-chunk-9](/public/img/2014-08-11-rsunlight/unnamed-chunk-9.png)
    +
    +### Influence Explorer API
    +
    +Search for contributions of equal to or more than `$20,000,000`.
    +
    +
    +```r
    +ie_contr(amount='>|20000000')[,c('amount','recipient_name','contributor_name')]
    +```
    +
    +```
    +##         amount
     ## 1  25177212.00
     ## 2  20000000.00
     ## 3  20000000.00
    @@ -260,10 +509,17 @@ 

    Influence Explorer API

    ## 8 GOLISANO, B THOMAS ## 9 GOLISANO, B THOMAS ## 10 GOLISANO, B THOMAS -
    -

    Top industries, by contributions given. UNKOWN is a very influential industry. Of course law firms are high up there, as well as real estate. I'm sure oil and gas is embarrased that they're contributing less than pulic sector unions.

    -
    (res <- ie_industries(method='top_ind', limit=10))
    -
    ##       count        amount                               id
    +```
    +
    +Top industries, by contributions given. _UNKOWN_ is a very influential industry. Of course _law firms_ are high up there, as well as _real estate_. I'm sure _oil and gas_ is embarrased that they're contributing less than _pulic sector unions_.
    +
    +
    +```r
    +(res <- ie_industries(method='top_ind', limit=10))
    +```
    +
    +```
    +##       count        amount                               id
     ## 1  14919818 3825359507.21 cdb3f500a3f74179bb4a5eb8b2932fa6
     ## 2   3600761 2787678962.95 f50cf984a2e3477c8167d32e2b14e052
     ## 3    329906 1717649914.58 9cac88377c3b400e89c2d6762e3f28f6
    @@ -279,35 +535,37 @@ 

    Influence Explorer API

    ## 2 TRUE LAWYERS/LAW FIRMS ## 3 TRUE CANDIDATE SELF-FINANCE ## 4 TRUE REAL ESTATE -## 5 TRUE SECURITIES & INVESTMENT +## 5 TRUE SECURITIES & INVESTMENT ## 6 TRUE PUBLIC SECTOR UNIONS ## 7 TRUE HEALTH PROFESSIONALS ## 8 TRUE INSURANCE -## 9 TRUE OIL & GAS +## 9 TRUE OIL & GAS ## 10 TRUE CASINOS/GAMBLING -
    res$amount <- as.numeric(res$amount)
    -ggplot(res, aes(reorder(name, amount), amount)) +
    -  geom_bar(stat = "identity") +
    -  coord_flip() +
    -  scale_y_continuous(labels=dollar) +
    -  theme_grey(base_size = 14)
    -
    -

    plot of chunk unnamed-chunk-11

    +``` + +```r +res$amount <- as.numeric(res$amount) +ggplot(res, aes(reorder(name, amount), amount)) + + geom_bar(stat = "identity") + + coord_flip() + + scale_y_continuous(labels=dollar) + + theme_grey(base_size = 14) +``` + +![plot of chunk unnamed-chunk-11](/public/img/2014-08-11-rsunlight/unnamed-chunk-11.png) -
    +------- -

    Feedback

    +## Feedback -

    Please do use rsunlight, and let us know what you want fixed, new features, etc.

    +Please do use `rsunlight`, and let us know what you want fixed, new features, etc. -

    Still to come:

    +## Still to come: -
      -
    • Functions to visualize data from each API. You can do this yourself, but a few functions will be created to help those that are new to R.
    • -
    • Vectorize functions so that you can give many inputs to a function instead of a single input.
    • -
    • test suite: embarrasingly, there is no test suite yet, boo me.
    • -
    • I plan to push rsunlight to CRAN soon as v0.3
    • -
    +* Functions to visualize data from each API. You can do this yourself, but a few functions will be created to help those that are new to R. +* Vectorize functions so that you can give many inputs to a function instead of a single input. +* test suite: embarrasingly, there is no test suite yet, boo me. +* I plan to push `rsunlight` to CRAN soon as `v0.3`
    @@ -320,39 +578,67 @@

    -

    My last blog post introduced the R package I'm working on analogsea, an R client for the Digital Ocean API.

    + My [last blog ](http://recology.info/2014/05/analogsea/) post introduced the R package I'm working on `analogsea`, an R client for the Digital Ocean API. + +Things have changed a bit, including fillig out more functions for all API endpoints, and incorparting feedback from Hadley and Karthik. The package is as `v0.1` now, so I thought I'd say a few things about how it works. + +Note that Digital Ocean's v2 API is in beta stage now, so the current version of `analogsea` at `v0.1` works with their v1 API. The [v2 branch of analogsea](https://github.com/sckott/analogsea/tree/v2) is being developed for their v2 API. + +If you sign up for an account with Digital Ocean use this referral link: [https://www.digitalocean.com/?refcode=0740f5169634](https://www.digitalocean.com/?refcode=0740f5169634) so I can earn some credits. thx :) + +First, installation + +Note: I did try to submit to CRAN, but Ripley complained about the package name so I'd rather not waste my time esp since people using this likely will already know about `install_github()`. + + +```r +devtools::install_github("sckott/analogsea") +``` + +Load the library + + +```r +library("analogsea") +``` + +``` +## Loading required package: magrittr +``` + +Authenticate has changed a bit. Whereas auth details were stored as environment variables before, I'm just using R's options. `do_auth()` will ask for your Digital Ocean details. You can enter them each R session, or store them in your `.Rprofile` file. After successful authentication, each function simply looks for your auth details with `getOption()`. You don't have to use this function first, though if you don't your first call to another function will ask for auth details. -

    Things have changed a bit, including fillig out more functions for all API endpoints, and incorparting feedback from Hadley and Karthik. The package is as v0.1 now, so I thought I'd say a few things about how it works.

    -

    Note that Digital Ocean's v2 API is in beta stage now, so the current version of analogsea at v0.1 works with their v1 API. The v2 branch of analogsea is being developed for their v2 API.

    +```r +do_auth() +``` -

    If you sign up for an account with Digital Ocean use this referral link: https://www.digitalocean.com/?refcode=0740f5169634 so I can earn some credits. thx :)

    +`sizes`, `images`, and `keys` functions have changed a bit, by default outputting a `data.frame` now. -

    First, installation

    +List available regions -

    Note: I did try to submit to CRAN, but Ripley complained about the package name so I'd rather not waste my time esp since people using this likely will already know about install_github().

    -
    devtools::install_github("sckott/analogsea")
    -
    -

    Load the library

    -
    library("analogsea")
    -
    ## Loading required package: magrittr
    -
    -

    Authenticate has changed a bit. Whereas auth details were stored as environment variables before, I'm just using R's options. do_auth() will ask for your Digital Ocean details. You can enter them each R session, or store them in your .Rprofile file. After successful authentication, each function simply looks for your auth details with getOption(). You don't have to use this function first, though if you don't your first call to another function will ask for auth details.

    -
    do_auth()
    -
    -

    sizes, images, and keys functions have changed a bit, by default outputting a data.frame now.

    -

    List available regions

    -
    regions()
    -
    ##   id            name slug
    +```r
    +regions()
    +```
    +
    +```
    +##   id            name slug
     ## 1  3 San Francisco 1 sfo1
     ## 2  4      New York 2 nyc2
     ## 3  5     Amsterdam 2 ams2
     ## 4  6     Singapore 1 sgp1
    -
    -

    List available sizes

    -
    sizes()
    -
    ##   id  name  slug memory cpu disk cost_per_hour cost_per_month
    +```
    +
    +List available sizes
    +
    +
    +```r
    +sizes()
    +```
    +
    +```
    +##   id  name  slug memory cpu disk cost_per_hour cost_per_month
     ## 1 66 512MB 512mb    512   1   20       0.00744            5.0
     ## 2 63   1GB   1gb   1024   1   30       0.01488           10.0
     ## 3 62   2GB   2gb   2048   2   40       0.02976           20.0
    @@ -362,11 +648,18 @@ 

    ## 7 60 32GB 32gb 32768 12 320 0.47619 320.0 ## 8 70 48GB 48gb 49152 16 480 0.71429 480.0 ## 9 69 64GB 64gb 65536 20 640 0.95238 640.0 -

    -

    List available images

    -
    head(images())
    -
    ##        id                  name             slug distribution public sfo1
    -## 1 3209452 rstudioserverssh_snap             <NA>       Ubuntu  FALSE    1
    +```
    +
    +List available images
    +
    +
    +```r
    +head(images())
    +```
    +
    +```
    +##        id                  name             slug distribution public sfo1
    +## 1 3209452 rstudioserverssh_snap                    Ubuntu  FALSE    1
     ## 2    1601        CentOS 5.8 x64   centos-5-8-x64       CentOS   TRUE    1
     ## 3    1602        CentOS 5.8 x32   centos-5-8-x32       CentOS   TRUE    1
     ## 4   12573        Debian 6.0 x64   debian-6-0-x64       Debian   TRUE    1
    @@ -379,42 +672,63 @@ 

    ## 4 1 1 1 1 1 ## 5 1 1 1 1 1 ## 6 1 1 1 1 1 -

    -

    List ssh keys

    -
    keys()
    -
    ## $ssh_keys
    +```
    +
    +List ssh keys
    +
    +
    +```r
    +keys()
    +```
    +
    +```
    +## $ssh_keys
     ## $ssh_keys[[1]]
     ## $ssh_keys[[1]]$id
     ## [1] 89103
     ##
     ## $ssh_keys[[1]]$name
    -## [1] "Scott Chamberlain"
    -
    -

    One change that's of interest is that most of the various droplets_*() functions take in the outputs of other droplets_*() functions. This means that we can pipe outputs of one droplets_*() function to another, including non-droplet_* functions (see examples).

    +## [1] "Scott Chamberlain" +``` + +One change that's of interest is that most of the various `droplets_*()` functions take in the outputs of other `droplets_*()` functions. This means that we can pipe outputs of one `droplets_*()` function to another, including non-`droplet_*` functions (see examples). + +Let's create a droplet: + + +```r +(res <- droplets_new(name="foo", size_slug = '512mb', image_slug = 'ubuntu-14-04-x64', region_slug = 'sfo1', ssh_key_ids = 89103)) +``` + +```r +$droplet +$droplet$id +[1] 1880805 -

    Let's create a droplet:

    -
    (res <- droplets_new(name="foo", size_slug = '512mb', image_slug = 'ubuntu-14-04-x64', region_slug = 'sfo1', ssh_key_ids = 89103))
    -
    $droplet
    -$droplet$id
    -[1] 1880805
    +$droplet$name
    +[1] "foo"
     
    -$droplet$name
    -[1] "foo"
    +$droplet$image_id
    +[1] 3240036
     
    -$droplet$image_id
    -[1] 3240036
    +$droplet$size_id
    +[1] 66
     
    -$droplet$size_id
    -[1] 66
    +$droplet$event_id
    +[1] 26711810
    +```
     
    -$droplet$event_id
    -[1] 26711810
    -
    -

    List my droplets

    +List my droplets -

    This function used to be do_droplets_get()

    -
    droplets()
    -
    ## $droplet_ids
    +This function used to be `do_droplets_get()`
    +
    +
    +```r
    +droplets()
    +```
    +
    +```
    +## $droplet_ids
     ## [1] 1880805
     ##
     ## $droplets
    @@ -423,7 +737,7 @@ 

    ## [1] 1880805 ## ## $droplets[[1]]$name -## [1] "foo" +## [1] "foo" ## ## $droplets[[1]]$image_id ## [1] 3240036 @@ -438,7 +752,7 @@

    ## [1] FALSE ## ## $droplets[[1]]$ip_address -## [1] "162.243.152.56" +## [1] "162.243.152.56" ## ## $droplets[[1]]$private_ip_address ## NULL @@ -447,27 +761,41 @@

    ## [1] FALSE ## ## $droplets[[1]]$status -## [1] "active" +## [1] "active" ## ## $droplets[[1]]$created_at -## [1] "2014-06-18T14:15:35Z" +## [1] "2014-06-18T14:15:35Z" ## ## ## ## $event_id ## NULL -

    -

    As mentioned above we can now pipe output of droplet*() functions to other droplet*() functions.

    - -

    Here, pipe output of lising droplets droplets() to the events() function

    -
    droplets() %>% events()
    -
    ## Error: No event id found
    -
    -

    In this case there were no event ids to get event data on.

    - -

    Here, we'll get details for the droplet we just created, then pipe that to droplets_power_off()

    -
    droplets(1880805) %>% droplets_power_off
    -
    ## $droplet_ids
    +```
    +
    +As mentioned above we can now pipe output of `droplet*()` functions to other `droplet*()` functions.
    +
    +Here, pipe output of lising droplets `droplets()` to the `events()` function
    +
    +
    +```r
    +droplets() %>% events()
    +```
    +
    +```
    +## Error: No event id found
    +```
    +
    +In this case there were no event ids to get event data on.
    +
    +Here, we'll get details for the droplet we just created, then pipe that to `droplets_power_off()`
    +
    +
    +```r
    +droplets(1880805) %>% droplets_power_off
    +```
    +
    +```
    +## $droplet_ids
     ## [1] 1880805
     ##
     ## $droplets
    @@ -479,7 +807,7 @@ 

    ## [1] 1880805 ## ## $droplets$droplets$name -## [1] "foo" +## [1] "foo" ## ## $droplets$droplets$image_id ## [1] 3240036 @@ -494,7 +822,7 @@

    ## [1] FALSE ## ## $droplets$droplets$ip_address -## [1] "162.243.152.56" +## [1] "162.243.152.56" ## ## $droplets$droplets$private_ip_address ## NULL @@ -503,10 +831,10 @@

    ## [1] FALSE ## ## $droplets$droplets$status -## [1] "active" +## [1] "active" ## ## $droplets$droplets$created_at -## [1] "2014-06-18T14:15:35Z" +## [1] "2014-06-18T14:15:35Z" ## ## $droplets$droplets$backups ## list() @@ -521,11 +849,20 @@

    ## ## $event_id ## [1] 26714109 -

    -

    Then pipe it again to droplets_power_on()

    -
    droplets(1880805) %>%
    +```
    +
    +Then pipe it again to `droplets_power_on()`
    +
    +
    +
    +
    +```r
    +droplets(1880805) %>%
       droplets_power_on
    -
    ## $droplet_ids
    +```
    +
    +```
    +## $droplet_ids
     ## [1] 1880805
     ##
     ## $droplets
    @@ -537,7 +874,7 @@ 

    ## [1] 1880805 ## ## $droplets$droplets$name -## [1] "foo" +## [1] "foo" ## ## $droplets$droplets$image_id ## [1] 3240036 @@ -552,7 +889,7 @@

    ## [1] FALSE ## ## $droplets$droplets$ip_address -## [1] "162.243.152.56" +## [1] "162.243.152.56" ## ## $droplets$droplets$private_ip_address ## NULL @@ -561,10 +898,10 @@

    ## [1] FALSE ## ## $droplets$droplets$status -## [1] "off" +## [1] "off" ## ## $droplets$droplets$created_at -## [1] "2014-06-18T14:15:35Z" +## [1] "2014-06-18T14:15:35Z" ## ## $droplets$droplets$backups ## list() @@ -579,189 +916,30 @@

    ## ## $event_id ## [1] 26714152 -

    Sys.sleep(6)
    -droplets(1880805)$droplets$status
    -
    ## [1] "off"
    -
    -

    Why not use more pipes?

    -
    droplets(1880805) %>%
    -  droplets_power_off %>%
    -  droplets_power_on %>%
    -  events
    -
    -
    +``` -

    Last time I talked about installing R, RStudio, etc. on a droplet. I'm still working out bugs in that stuff, but do test out so it can get better faster. See do_install().

    +```r +Sys.sleep(6) +droplets(1880805)$droplets$status +``` -

    - -
    -

    - - analogsea - an R client for the Digital Ocean API - -

    +``` +## [1] "off" +``` - - -

    I think this package name is my best yet. Maybe it doesn't make sense though? At least it did at the time...

    - -

    Anyway, the main motivation for this package was to be able to automate spinning up Linux boxes to do cloud R/RStudio work. Of course if you are a command line native this is all easy for you, but if you are afraid of the command line and/or just don't want to deal with it, this tool will hopefully help.

    - -

    Most of the functions in this package wrap the Digital Ocean API. So you can do things like create a new droplet, get information on your droplets, destroy droplets, get information on available images, make snapshots, etc. Basically everything you can do from their website you can do here. Note that all functions are prefixed with do_ (for Digital Ocean).

    - -

    The droplet creation part is what we can leverage to spin up a cloud machine to then install R on, and optionally RStudio server, and even RStudio Shiny server. This allows you to stay within R entirely, not having to go to ssh into the Linux machine itself or go to the Digital Ocean website (after initial setup of course).

    - -

    If you try this, I recommend using this on R on the command line as you can more easily kill the R session if something goes wrong, and quickly open a new tab/window to ssh into the Linux machine if you want.

    - -

    First, installation

    -
    devtools::install_github("sckott/analogsea")
    -
    -

    Load the library

    -
    library("analogsea")
    -
    -

    Firt, authenticate. This will ask for your Digital Ocean details. You can enter them each R session, or store them in your .Renviron file. After successful authentication, each function simply looks for your auth details with Sys.getenv().

    -
    do_auth()
    -
    -

    List available regions

    -
    sapply(do_regions()$regions, "[[", "name")
    -
    ## [1] "San Francisco 1" "New York 2"      "Amsterdam 2"     "Singapore 1"
    -
    -

    List available images

    -
    sapply(do_images()$images, "[[", "name")
    -
    ##  [1] "rstudioserverssh_snap"                          
    -##  [2] "CentOS 5.8 x64"                                 
    -##  [3] "CentOS 5.8 x32"                                 
    -##  [4] "Debian 6.0 x64"                                 
    -##  [5] "Debian 6.0 x32"                                 
    -##  [6] "Ubuntu 10.04 x64"                               
    -##  [7] "Ubuntu 10.04 x32"                               
    -##  [8] "Arch Linux 2013.05 x64"                         
    -##  [9] "Arch Linux 2013.05 x32"                         
    -## [10] "CentOS 6.4 x32"                                 
    -## [11] "CentOS 6.4 x64"                                 
    -## [12] "Ubuntu 12.04.4 x32"                             
    -## [13] "Ubuntu 12.04.4 x64"                             
    -## [14] "Ubuntu 13.10 x32"                               
    -## [15] "Ubuntu 13.10 x64"                               
    -## [16] "Fedora 19 x32"                                  
    -## [17] "Fedora 19 x64"                                  
    -## [18] "MEAN on Ubuntu 12.04.4"                         
    -## [19] "Ghost 0.4.2 on Ubuntu 12.04"                    
    -## [20] "Wordpress on Ubuntu 13.10"                      
    -## [21] "Ruby on Rails on Ubuntu 12.10 (Nginx + Unicorn)"
    -## [22] "Redmine on Ubuntu 12.04"                        
    -## [23] "Ubuntu 14.04 x32"                               
    -## [24] "Ubuntu 14.04 x64"                               
    -## [25] "Fedora 20 x32"                                  
    -## [26] "Fedora 20 x64"                                  
    -## [27] "Dokku v0.2.3 on Ubuntu 14.04"                   
    -## [28] "Debian 7.0 x64"                                 
    -## [29] "Debian 7.0 x32"                                 
    -## [30] "CentOS 6.5 x64"                                 
    -## [31] "CentOS 6.5 x32"                                 
    -## [32] "Docker 0.11.1 on Ubuntu 13.10 x64"              
    -## [33] "Django on Ubuntu 14.04"                         
    -## [34] "LAMP on Ubuntu 14.04"                           
    -## [35] "node-v0.10.28 on Ubuntu 14.04"                  
    -## [36] "GitLab 6.9.0 CE"
    -
    -

    List available sizes

    -
    do.call(rbind, do_sizes()$sizes)
    -
    ##       id name    slug    memory cpu disk cost_per_hour cost_per_month
    -##  [1,] 66 "512MB" "512mb" 512    1   20   0.00744       "5.0"         
    -##  [2,] 63 "1GB"   "1gb"   1024   1   30   0.01488       "10.0"        
    -##  [3,] 62 "2GB"   "2gb"   2048   2   40   0.02976       "20.0"        
    -##  [4,] 64 "4GB"   "4gb"   4096   2   60   0.05952       "40.0"        
    -##  [5,] 65 "8GB"   "8gb"   8192   4   80   0.1191        "80.0"        
    -##  [6,] 61 "16GB"  "16gb"  16384  8   160  0.2381        "160.0"       
    -##  [7,] 60 "32GB"  "32gb"  32768  12  320  0.4762        "320.0"       
    -##  [8,] 70 "48GB"  "48gb"  49152  16  480  0.7143        "480.0"       
    -##  [9,] 69 "64GB"  "64gb"  65536  20  640  0.9524        "640.0"
    -
    -

    Let's create a droplet:

    -
    (res <- do_droplets_new(name="foo", size_slug = '512mb', image_slug = 'ubuntu-14-04-x64', region_slug = 'sfo1', ssh_key_ids = 89103))
    -
    $status
    -[1] "OK"
    -
    -$droplet
    -$droplet$id
    -[1] 1733336
    -
    -$droplet$name
    -[1] "foo"
    -
    -$droplet$image_id
    -[1] 3240036
    -
    -$droplet$size_id
    -[1] 66
    -
    -$droplet$event_id
    -[1] 25278892
    -
    -
    -attr(,"class")
    -[1] "dodroplet"
    -
    -

    List my droplets

    -
    do_droplets_get()
    -
    ## $status
    -## [1] "OK"
    -## 
    -## $droplets
    -## $droplets[[1]]
    -## $droplets[[1]]$id
    -## [1] 1733336
    -## 
    -## $droplets[[1]]$name
    -## [1] "foo"
    -## 
    -## $droplets[[1]]$image_id
    -## [1] 3240036
    -## 
    -## $droplets[[1]]$size_id
    -## [1] 66
    -## 
    -## $droplets[[1]]$region_id
    -## [1] 3
    -## 
    -## $droplets[[1]]$backups_active
    -## [1] FALSE
    -## 
    -## $droplets[[1]]$ip_address
    -## [1] "107.170.211.252"
    -## 
    -## $droplets[[1]]$private_ip_address
    -## NULL
    -## 
    -## $droplets[[1]]$locked
    -## [1] FALSE
    -## 
    -## $droplets[[1]]$status
    -## [1] "active"
    -## 
    -## $droplets[[1]]$created_at
    -## [1] "2014-05-28T05:59:22Z"
    -
    -

    Cool, we have a new Linux box with 512 mb RAM, running Ubuntu 14.04 in the SF region. Notice that I'm using my SSH key here. If you don't use your SSH key, Digital Ocean will email you a password, which you then use. We just have to wait a bit (sometimes 20 seconds, sometimes a few minutes) for it to spin up.

    - -

    Now we can install stuff. Here, I'll install R, and RStudio Server. This step prints out the progress as you would see if you did ssh into the box itself outside of R. The RStudio Server instance will pop up in your default browser when this operation is done.

    -
    do_install(res$droplet$id, what='rstudio', usr='hey', pwd='there')
    -
    -

    - -

    You can install some things like the libcurl and libxml libraries too with the deps parameter.

    - -

    When you're done, you can destroy your droplet from R too

    -
    do_droplets_destroy(res$droplet$id)
    -
    ## $status
    -## [1] "OK"
    -## 
    -## $event_id
    -## [1] 25279124
    -
    -

    Let me know if you have any thoughts :)

    +Why not use more pipes? + + +```r +droplets(1880805) %>% + droplets_power_off %>% + droplets_power_on %>% + events +``` + +------- + +Last time I talked about installing R, RStudio, etc. on a droplet. I'm still working out bugs in that stuff, but do test out so it can get better faster. See `do_install()`.
    diff --git a/_site/page15/index.html b/_site/page15/index.html index 297c6a2bb1..755063911b 100644 --- a/_site/page15/index.html +++ b/_site/page15/index.html @@ -59,6 +59,236 @@

    Recology

      +
    +

    + + analogsea - an R client for the Digital Ocean API + +

    + + + + I think this package name is my best yet. Maybe it doesn't make sense though? At least it did at the time... + +Anyway, the main motivation for this package was to be able to automate spinning up Linux boxes to do cloud R/RStudio work. Of course if you are a command line native this is all easy for you, but if you are afraid of the command line and/or just don't want to deal with it, this tool will hopefully help. + +Most of the functions in this package wrap the Digital Ocean API. So you can do things like create a new _droplet_, get information on your droplets, _destroy_ droplets, get information on available images, make snapshots, etc. Basically everything you can do from their website you can do here. Note that all functions are prefixed with `do_` (for Digital Ocean). + +The droplet creation part is what we can leverage to spin up a cloud machine to then install R on, and optionally RStudio server, and even RStudio Shiny server. This allows you to stay within R entirely, not having to go to `ssh` into the Linux machine itself or go to the Digital Ocean website (after initial setup of course). + +If you try this, I recommend using this on R on the command line as you can more easily kill the R session if something goes wrong, and quickly open a new tab/window to `ssh` into the Linux machine if you want. + +First, installation + + +```r +devtools::install_github("sckott/analogsea") +``` + +Load the library + + +```r +library("analogsea") +``` + +Firt, authenticate. This will ask for your Digital Ocean details. You can enter them each R session, or store them in your `.Renviron` file. After successful authentication, each function simply looks for your auth details with `Sys.getenv()`. + + +```r +do_auth() +``` + +List available regions + + +```r +sapply(do_regions()$regions, "[[", "name") +``` + +``` +## [1] "San Francisco 1" "New York 2" "Amsterdam 2" "Singapore 1" +``` + +List available images + + +```r +sapply(do_images()$images, "[[", "name") +``` + +``` +## [1] "rstudioserverssh_snap" +## [2] "CentOS 5.8 x64" +## [3] "CentOS 5.8 x32" +## [4] "Debian 6.0 x64" +## [5] "Debian 6.0 x32" +## [6] "Ubuntu 10.04 x64" +## [7] "Ubuntu 10.04 x32" +## [8] "Arch Linux 2013.05 x64" +## [9] "Arch Linux 2013.05 x32" +## [10] "CentOS 6.4 x32" +## [11] "CentOS 6.4 x64" +## [12] "Ubuntu 12.04.4 x32" +## [13] "Ubuntu 12.04.4 x64" +## [14] "Ubuntu 13.10 x32" +## [15] "Ubuntu 13.10 x64" +## [16] "Fedora 19 x32" +## [17] "Fedora 19 x64" +## [18] "MEAN on Ubuntu 12.04.4" +## [19] "Ghost 0.4.2 on Ubuntu 12.04" +## [20] "Wordpress on Ubuntu 13.10" +## [21] "Ruby on Rails on Ubuntu 12.10 (Nginx + Unicorn)" +## [22] "Redmine on Ubuntu 12.04" +## [23] "Ubuntu 14.04 x32" +## [24] "Ubuntu 14.04 x64" +## [25] "Fedora 20 x32" +## [26] "Fedora 20 x64" +## [27] "Dokku v0.2.3 on Ubuntu 14.04" +## [28] "Debian 7.0 x64" +## [29] "Debian 7.0 x32" +## [30] "CentOS 6.5 x64" +## [31] "CentOS 6.5 x32" +## [32] "Docker 0.11.1 on Ubuntu 13.10 x64" +## [33] "Django on Ubuntu 14.04" +## [34] "LAMP on Ubuntu 14.04" +## [35] "node-v0.10.28 on Ubuntu 14.04" +## [36] "GitLab 6.9.0 CE" +``` + +List available sizes + + +```r +do.call(rbind, do_sizes()$sizes) +``` + +``` +## id name slug memory cpu disk cost_per_hour cost_per_month +## [1,] 66 "512MB" "512mb" 512 1 20 0.00744 "5.0" +## [2,] 63 "1GB" "1gb" 1024 1 30 0.01488 "10.0" +## [3,] 62 "2GB" "2gb" 2048 2 40 0.02976 "20.0" +## [4,] 64 "4GB" "4gb" 4096 2 60 0.05952 "40.0" +## [5,] 65 "8GB" "8gb" 8192 4 80 0.1191 "80.0" +## [6,] 61 "16GB" "16gb" 16384 8 160 0.2381 "160.0" +## [7,] 60 "32GB" "32gb" 32768 12 320 0.4762 "320.0" +## [8,] 70 "48GB" "48gb" 49152 16 480 0.7143 "480.0" +## [9,] 69 "64GB" "64gb" 65536 20 640 0.9524 "640.0" +``` + +Let's create a droplet: + + +```r +(res <- do_droplets_new(name="foo", size_slug = '512mb', image_slug = 'ubuntu-14-04-x64', region_slug = 'sfo1', ssh_key_ids = 89103)) +``` + +```r +$status +[1] "OK" + +$droplet +$droplet$id +[1] 1733336 + +$droplet$name +[1] "foo" + +$droplet$image_id +[1] 3240036 + +$droplet$size_id +[1] 66 + +$droplet$event_id +[1] 25278892 + + +attr(,"class") +[1] "dodroplet" +``` + + + +List my droplets + + +```r +do_droplets_get() +``` + +``` +## $status +## [1] "OK" +## +## $droplets +## $droplets[[1]] +## $droplets[[1]]$id +## [1] 1733336 +## +## $droplets[[1]]$name +## [1] "foo" +## +## $droplets[[1]]$image_id +## [1] 3240036 +## +## $droplets[[1]]$size_id +## [1] 66 +## +## $droplets[[1]]$region_id +## [1] 3 +## +## $droplets[[1]]$backups_active +## [1] FALSE +## +## $droplets[[1]]$ip_address +## [1] "107.170.211.252" +## +## $droplets[[1]]$private_ip_address +## NULL +## +## $droplets[[1]]$locked +## [1] FALSE +## +## $droplets[[1]]$status +## [1] "active" +## +## $droplets[[1]]$created_at +## [1] "2014-05-28T05:59:22Z" +``` + +Cool, we have a new Linux box with 512 mb RAM, running Ubuntu 14.04 in the SF region. Notice that I'm using my SSH key here. If you don't use your SSH key, Digital Ocean will email you a password, which you then use. We just have to wait a bit (sometimes 20 seconds, sometimes a few minutes) for it to spin up. + +Now we can install stuff. Here, I'll install R, and RStudio Server. This step prints out the progress as you would see if you did `ssh` into the box itself outside of R. The RStudio Server instance will pop up in your default browser when this operation is done. + + +```r +do_install(res$droplet$id, what='rstudio', usr='hey', pwd='there') +``` + + + +You can install some things like the `libcurl` and `libxml` libraries too with the `deps` parameter. + +When you're done, you can destroy your droplet from R too + + +```r +do_droplets_destroy(res$droplet$id) +``` + +``` +## $status +## [1] "OK" +## +## $event_id +## [1] 25279124 +``` + +Let me know if you have any thoughts :) + +
    +

    @@ -68,119 +298,148 @@

    -

    Someone asked about plotting something like this today

    - -

    I wrote a few functions previously to do something like this. However, since then ggplot2 has changed, and one of the functions no longer works.

    - -

    Hence, I fixed opts() to theme(), theme_blank() to element_blank(), and panel.background = element_blank() to plot.background = element_blank() to get the histograms to show up with the line plot and not cover it.

    - -

    The new functions:

    -
    loghistplot  <- function(data) {
    -  names(data) <- c('x','y') # rename columns
    -
    -  # get min and max axis values
    -  min_x <- min(data$x)
    -  max_x <- max(data$x)
    -  min_y <- min(data$y)
    -  max_y <- max(data$y)
    -
    -  # get bin numbers
    -  bin_no <- max(hist(data$x, plot = FALSE)$counts) + 5
    -
    -  # create plots
    -  a <- ggplot(data, aes(x = x, y = y)) +
    -    theme_bw(base_size=16) +
    -    geom_smooth(method = "glm", family = "binomial", se = TRUE,
    -                colour='black', size=1.5, alpha = 0.3) +
    -    scale_x_continuous(limits=c(min_x,max_x)) +
    -    theme(panel.grid.major = element_blank(),
    -          panel.grid.minor=element_blank(),
    -          panel.background = element_blank(),
    -          plot.background = element_blank()) +
    -    labs(y = "Probability\n", x = "\nYour X Variable")
    -
    -  theme_loghist <- list(
    -    theme(panel.grid.major = element_blank(),
    -          panel.grid.minor=element_blank(),
    -          axis.text.y = element_blank(),
    -          axis.text.x = element_blank(),
    -          axis.ticks = element_blank(),
    -          panel.border = element_blank(),
    -          panel.background = element_blank(),
    -          plot.background = element_blank())
    -  )
    -
    -  b <-
    -  ggplot(data[data$y == unique(data$y)[1], ], aes(x = x)) +
    -    theme_bw(base_size=16) +
    -    geom_histogram(fill = "grey") +
    -    scale_y_continuous(limits=c(0,bin_no)) +
    -    scale_x_continuous(limits=c(min_x,max_x)) +
    -    theme_loghist +
    -    labs(y='\n', x='\n')
    -
    -  c <- ggplot(data[data$y == unique(data$y)[2], ], aes(x = x)) +
    -    theme_bw(base_size=16) +
    -    geom_histogram(fill = "grey") +
    -    scale_y_continuous(trans='reverse', limits=c(bin_no,0)) +
    -    scale_x_continuous(limits=c(min_x,max_x)) +
    -    theme_loghist +
    -    labs(y='\n', x='\n')
    -
    -  grid.newpage()
    -  pushViewport(viewport(layout = grid.layout(1,1)))
    -
    -  vpa_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5)
    -  vpb_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5)
    -  vpc_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5)
    -
    -  print(b, vp = vpb_)
    -  print(c, vp = vpc_)
    -  print(a, vp = vpa_)
    -}
    -
    logpointplot  <- function(data) {
    -  names(data) <- c('x','y') # rename columns
    -
    -  # get min and max axis values
    -  min_x <- min(data$x)
    -  max_x <- max(data$x)
    -  min_y <- min(data$y)
    -  max_y <- max(data$y)
    -
    -  # create plots
    -  ggplot(data, aes(x = x, y = y)) +
    -    theme_bw(base_size=16) +
    -    geom_point(size = 3, alpha = 0.5, position = position_jitter(w=0, h=0.02)) +
    -    geom_smooth(method = "glm", family = "binomial", se = TRUE,
    -                colour='black', size=1.5, alpha = 0.3) +
    -    scale_x_continuous(limits=c(min_x,max_x)) +
    -    theme(panel.grid.major = element_blank(),
    -          panel.grid.minor=element_blank(),
    -          panel.background = element_blank()) +
    -    labs(y = "Probability\n", x = "\nYour X Variable")
    -
    -}
    -
    -

    Install ggplot2 and gridExtra if you don't have them:

    -
    install.packages(c("ggplot2","gridExtra"), repos = "http://cran.rstudio.com")
    -
    -

    And their use:

    - -

    Logistic histogram plots

    -
    loghistplot(data=mtcars[,c("mpg","vs")])
    -
    -

    plot of chunk unnamed-chunk-5

    -
    loghistplot(movies[,c("rating","Action")])
    -
    -

    plot of chunk unnamed-chunk-6

    - -

    Logistic point plots

    -
    loghistplot(data=mtcars[,c("mpg","vs")])
    -
    -

    plot of chunk unnamed-chunk-7

    -
    loghistplot(movies[,c("rating","Action")])
    -
    -

    plot of chunk unnamed-chunk-8

    + Someone asked about plotting something like this today + +I [wrote a few functions previously](http://recology.info/2012/01/logistic-regression-barplot-fig/) to do something like this. However, since then `ggplot2` has changed, and one of the functions no longer works. + +Hence, I fixed `opts()` to `theme()`, `theme_blank()` to `element_blank()`, and `panel.background = element_blank()` to `plot.background = element_blank()` to get the histograms to show up with the line plot and not cover it. + +The new functions: + + +```r +loghistplot <- function(data) { + names(data) <- c('x','y') # rename columns + + # get min and max axis values + min_x <- min(data$x) + max_x <- max(data$x) + min_y <- min(data$y) + max_y <- max(data$y) + + # get bin numbers + bin_no <- max(hist(data$x, plot = FALSE)$counts) + 5 + + # create plots + a <- ggplot(data, aes(x = x, y = y)) + + theme_bw(base_size=16) + + geom_smooth(method = "glm", family = "binomial", se = TRUE, + colour='black', size=1.5, alpha = 0.3) + + scale_x_continuous(limits=c(min_x,max_x)) + + theme(panel.grid.major = element_blank(), + panel.grid.minor=element_blank(), + panel.background = element_blank(), + plot.background = element_blank()) + + labs(y = "Probability\n", x = "\nYour X Variable") + + theme_loghist <- list( + theme(panel.grid.major = element_blank(), + panel.grid.minor=element_blank(), + axis.text.y = element_blank(), + axis.text.x = element_blank(), + axis.ticks = element_blank(), + panel.border = element_blank(), + panel.background = element_blank(), + plot.background = element_blank()) + ) + + b <- + ggplot(data[data$y == unique(data$y)[1], ], aes(x = x)) + + theme_bw(base_size=16) + + geom_histogram(fill = "grey") + + scale_y_continuous(limits=c(0,bin_no)) + + scale_x_continuous(limits=c(min_x,max_x)) + + theme_loghist + + labs(y='\n', x='\n') + + c <- ggplot(data[data$y == unique(data$y)[2], ], aes(x = x)) + + theme_bw(base_size=16) + + geom_histogram(fill = "grey") + + scale_y_continuous(trans='reverse', limits=c(bin_no,0)) + + scale_x_continuous(limits=c(min_x,max_x)) + + theme_loghist + + labs(y='\n', x='\n') + + grid.newpage() + pushViewport(viewport(layout = grid.layout(1,1))) + + vpa_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5) + vpb_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5) + vpc_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5) + + print(b, vp = vpb_) + print(c, vp = vpc_) + print(a, vp = vpa_) +} +``` + + +```r +logpointplot <- function(data) { + names(data) <- c('x','y') # rename columns + + # get min and max axis values + min_x <- min(data$x) + max_x <- max(data$x) + min_y <- min(data$y) + max_y <- max(data$y) + + # create plots + ggplot(data, aes(x = x, y = y)) + + theme_bw(base_size=16) + + geom_point(size = 3, alpha = 0.5, position = position_jitter(w=0, h=0.02)) + + geom_smooth(method = "glm", family = "binomial", se = TRUE, + colour='black', size=1.5, alpha = 0.3) + + scale_x_continuous(limits=c(min_x,max_x)) + + theme(panel.grid.major = element_blank(), + panel.grid.minor=element_blank(), + panel.background = element_blank()) + + labs(y = "Probability\n", x = "\nYour X Variable") + +} +``` + +Install `ggplot2` and `gridExtra` if you don't have them: + + +```r +install.packages(c("ggplot2","gridExtra"), repos = "http://cran.rstudio.com") +``` + +And their use: + +__Logistic histogram plots__ + + +```r +loghistplot(data=mtcars[,c("mpg","vs")]) +``` + +![plot of chunk unnamed-chunk-5](/public/img/2014-05-22-logplotreboot/unnamed-chunk-5.png) + + +```r +loghistplot(movies[,c("rating","Action")]) +``` + +![plot of chunk unnamed-chunk-6](/public/img/2014-05-22-logplotreboot/unnamed-chunk-6.png) + + +__Logistic point plots__ + + +```r +loghistplot(data=mtcars[,c("mpg","vs")]) +``` + +![plot of chunk unnamed-chunk-7](/public/img/2014-05-22-logplotreboot/unnamed-chunk-7.png) + + +```r +loghistplot(movies[,c("rating","Action")]) +``` + +![plot of chunk unnamed-chunk-8](/public/img/2014-05-22-logplotreboot/unnamed-chunk-8.png)

    @@ -193,32 +452,43 @@

    -

    The history

    + ## The history -

    Cowsay is a terminal program that generates ascii pictures of a cow saying what you tell the cow to say in a bubble. See the Wikipedia page for more information: http://en.wikipedia.org/wiki/Cowsay.

    +Cowsay is a terminal program that generates ascii pictures of a cow saying what you tell the cow to say in a bubble. See the Wikipedia page for more information: [http://en.wikipedia.org/wiki/Cowsay](http://en.wikipedia.org/wiki/Cowsay). -

    Install cowsay to use in your terminal (on OSX):

    -
    brew update
    +Install cowsay to use in your terminal (on OSX):
    +
    +```
    +brew update
     brew install cowsay
    -
    -

    Type cowsay hello world!, and you get:

    -
     ______________
    -< hello world! >
    +```
    +
    +Type `cowsay hello world!`, and you get:
    +
    +```
    + ______________
    +< hello world! >
      --------------
             \   ^__^
              \  (oo)\_______
                 (__)\       )\/\
                     ||----w |
                     ||     ||
    -
    -

    Optionally, you can install fortune to get pseudorandom messages from a database of quotations. On OSX do brew install fortune, then you can pipe a fortune quote to cowsay:

    -
    fortune | cowsay
    -
    -

    And get something like:

    -
     ______________________________________
    -/ "To take a significant step forward, \
    +```
    +
    +Optionally, you can install [fortune](http://en.wikipedia.org/wiki/Fortune_(Unix)) to get pseudorandom messages from a database of quotations. On OSX do `brew install fortune`, then you can pipe a fortune quote to `cowsay`:
    +
    +```
    +fortune | cowsay
    +```
    +
    +And get something like:
    +
    +```
    + ______________________________________
    +/ "To take a significant step forward, \
     | you must make a series of finite     |
    -| improvements." -- Donald J. Atwood,  |
    +| improvements." -- Donald J. Atwood,  |
     \ General Motors                       /
      --------------------------------------
             \   ^__^
    @@ -226,37 +496,51 @@ 

    The history

    (__)\ )\/\ ||----w | || || -
    -

    You can also get different animals. Try cowsay -f tux <yourmessage>

    +``` + +You can also get different animals. Try `cowsay -f tux ` + +## Cowsay in R + +Why cowsay for R? Why not. You never know what you will learn in fun side projects. Basically, this cowsay R package we are making prints messages that you pass to the function `say`. There are three arguments to the `say` function: + +* __what__: What do you want to say? You can pass it a custom message, anything you want, like _what's up_, or _howdy!_. You can also get R's version of fortunes, quotes about R. Just pass the exact term _forture_. If you want a fact about cats from the [Cat Facts API](http://catfacts-api.appspot.com/), pass in _catfact_. Last, you can get the current time by passing _time_ to this parameter. +* __by__: Type of animal, one of cow, chicken, poop, cat, ant, pumpkin, ghost, spider, rabbit, pig, snowman, or frog. If you want more animals, send a pull request, or ask and at some point it will be added. +* __type__: One of message (default), warning, or string (returns string). You could use string to pass into other functions, etc., instead of printing a warning or message. + +There are three other contributors so far (a big thanks to them): -

    Cowsay in R

    +* Tyler Rinker +* Thomas Leeper +* Noam Ross -

    Why cowsay for R? Why not. You never know what you will learn in fun side projects. Basically, this cowsay R package we are making prints messages that you pass to the function say. There are three arguments to the say function:

    +### Installation -
      -
    • what: What do you want to say? You can pass it a custom message, anything you want, like what's up, or howdy!. You can also get R's version of fortunes, quotes about R. Just pass the exact term forture. If you want a fact about cats from the Cat Facts API, pass in catfact. Last, you can get the current time by passing time to this parameter.
    • -
    • by: Type of animal, one of cow, chicken, poop, cat, ant, pumpkin, ghost, spider, rabbit, pig, snowman, or frog. If you want more animals, send a pull request, or ask and at some point it will be added.
    • -
    • type: One of message (default), warning, or string (returns string). You could use string to pass into other functions, etc., instead of printing a warning or message.
    • -
    -

    There are three other contributors so far (a big thanks to them):

    +```r +library(devtools) +install_github("cowsay", "sckott") +``` -
      -
    • Tyler Rinker
    • -
    • Thomas Leeper
    • -
    • Noam Ross
    • -
    -

    Installation

    -
    library(devtools)
    -install_github("cowsay", "sckott")
    -
    library(cowsay)
    -
    -

    p.s. or install_github("sckott/cowsay") if you have a newer version of devtools

    -

    Get time

    -
    say("time")
    -
     ----- 
    +```r
    +library(cowsay)
    +```
    +
    +
    +p.s. or `install_github("sckott/cowsay")` if you have a newer version of devtools
    +
    +### Get time
    +
    +
    +```r
    +say("time")
    +```
    +
    +```
    +
    + ----- 
      2014-02-20 14:15:35 
      ------ 
         \   ^__^ 
    @@ -264,27 +548,47 @@ 

    Get time

    (__)\ )\ /\ ||------w| || || -
    say("time", "chicken")
    -
     ----- 
    +```
    +
    +
    +
    +```r
    +say("time", "chicken")
    +```
    +
    +```
    +
    +
    + ----- 
      2014-02-20 14:15:35 
      ------ 
         \   
          \  
              _
            _/ }
    -      `>' \
    +      `>' \
           `|   \
    -       |   /'-.     .-.
    -        \'     ';`--' .'
    -         \'.    `'-./
    -          '.`-..-;`
    -            `;-..'
    +       |   /'-.     .-.
    +        \'     ';`--' .'
    +         \'.    `'-./
    +          '.`-..-;`
    +            `;-..'
                 _| _|
                 /` /`
    -
    -

    Vary type of output, default calls message

    -
    say("hello world")
    -
     ----- 
    +  
    +```
    +
    +
    +### Vary type of output, default calls message
    +
    +
    +```r
    +say("hello world")
    +```
    +
    +```
    +
    + ----- 
      hello world 
      ------ 
         \   ^__^ 
    @@ -292,8 +596,16 @@ 

    Vary type of output, default calls message

    (__)\ )\ /\ ||------w| || || -
    say("hello world", type = "warning")
    -
    Warning: 
    +```
    +
    +
    +
    +```r
    +say("hello world", type = "warning")
    +```
    +
    +```
    +Warning: 
      ----- 
      hello world 
      ------ 
    @@ -302,14 +614,32 @@ 

    Vary type of output, default calls message

    (__)\ )\ /\ ||------w| || || -
    say("hello world", type = "string")
    -
    [1] "\n ----- \n hello world \n ------ \n    \\   ^__^ \n     \\  (oo)\\ ________ \n        (__)\\         )\\ /\\ \n             ||------w|\n             ||      ||"
    -
    -

    Catfacts!!!!

    - -

    From the catfacts API, we can get random cat facts. If you put in catfact you by default get a cat saying it.

    -
    say("catfact", "cat")
    -
     ----- 
    +```
    +
    +
    +
    +```r
    +say("hello world", type = "string")
    +```
    +
    +```
    +[1] "\n ----- \n hello world \n ------ \n    \\   ^__^ \n     \\  (oo)\\ ________ \n        (__)\\         )\\ /\\ \n             ||------w|\n             ||      ||"
    +```
    +
    +
    +### Catfacts!!!!
    +
    +From the [catfacts API](http://catfacts-api.appspot.com/), we can get random cat facts. If you put in _catfact_ you by default get a cat saying it. 
    +
    +
    +```r
    +say("catfact", "cat")
    +```
    +
    +```
    +
    +
    + ----- 
      Neutering a cat extends its life span by two or three years. 
      ------ 
         \   
    @@ -317,21 +647,31 @@ 

    Catfacts!!!!

    \`*-. ) _`-. . : `. . - : _ ' + : _ ' ; *` _. `*-._ - `-.-' `-. + `-.-' `-. ; ` `. :. . \ - .\ . : .-' . - ' `+.; ; ' : - : ' | ; ;-. - ; ' : :`-: _.`* ; - .*' / .*' ; .*`- +' `*' - `*-* `*-* `*-*' -
    -

    R fortunes

    -
    say("fortune")
    -
     ----- 
    +                  .\  .   :   .-'   .   
    +                  '  `+.;  ;  '      :   
    +                  :  '  |    ;       ;-. 
    +                  ; '   : :`-:     _.`* ;
    +               .*' /  .*' ; .*`- +'  `*' 
    +               `*-*   `*-*  `*-*'        
    +    
    +```
    +
    +
    +### R fortunes
    +
    +
    +```r
    +say("fortune")
    +```
    +
    +```
    +
    + ----- 
      If I were to be treated by a cure created by stepwise regression, I would prefer voodoo.
      Dieter Menne
      in a thread about regressions with many variables
    @@ -343,8 +683,18 @@ 

    R fortunes

    (__)\ )\ /\ ||------w| || || -
    say("fortune", "pig")
    -
     ----- 
    +```
    +
    +
    +
    +```r
    +say("fortune", "pig")
    +```
    +
    +```
    +
    +
    + ----- 
      Cross posting is sociopathic.
      Roger Koenker
      NA
    @@ -355,21 +705,36 @@ 

    R fortunes

    \ _//| .-~~~-. _/oo } }-@ - ('')_ } | - `--'| { }--{ } + ('')_ } | + `--'| { }--{ } //_/ /_/ -
    -

    Incorporate into a function

    - -

    Define a function

    -
    foo <- function(x) {
    -    if (x < 5) 
    -        say("woops, x should be 5 or greater")
    -}
    -
    -

    Call the function, with an error on purpose

    -
    foo(3)
    -
     ----- 
    +  
    +```
    +
    +
    +### Incorporate into a function
    +
    +Define a function
    +
    +
    +```r
    +foo <- function(x) {
    +    if (x < 5) 
    +        say("woops, x should be 5 or greater")
    +}
    +```
    +
    +
    +Call the function, with an error on purpose
    +
    +
    +```r
    +foo(3)
    +```
    +
    +```
    +
    + ----- 
      woops, x should be 5 or greater 
      ------ 
         \   ^__^ 
    @@ -377,172 +742,46 @@ 

    Incorporate into a function

    (__)\ )\ /\ ||------w| || || -
    -

    Or capture a warning or message and pass to the say function

    -
    foo2 <- function(x) {
    -    err <- tryCatch(x^2, error = function(e) e)
    -    say(err$message, "frog")
    -}
    -
    -

    Then call the function

    -
    foo2("hello")
    -
     ----- 
    +```
    +
    +
    +Or capture a warning or message and pass to the `say` function
    +
    +
    +```r
    +foo2 <- function(x) {
    +    err <- tryCatch(x^2, error = function(e) e)
    +    say(err$message, "frog")
    +}
    +```
    +
    +
    +Then call the function 
    +
    +
    +```r
    +foo2("hello")
    +```
    +
    +```
    +
    +
    + ----- 
      non-numeric argument to binary operator 
      ------ 
         \   
          \  
             (.)_(.)
          _ (   _   ) _
    -    / \/`-----'\/ \
    +    / \/`-----'\/ \
       __\ ( (     ) ) /__
       )   /\ \._./ /\   (
        )_/ /|\   /|\ \_(
    -
    -

    Awesome. Much better to have an error message from a frog than just the harsh console alone :)

    - -
    -
    -

    - - cites - citation stuff from the command line - -

    +``` - - -

    I've been learning Ruby, and decided to scratch an itch: getting citations for papers to put in a bibtex file or my Zotero library. This usually requires two parts: 1) searching for an article with keywords, and then 2) getting the citation once the paper is found. Since I am lazy, I would prefer to do this from the command line instead of opening up a browser. Thus => cites. (Note, I'm sure someone has created something better - the point is I'm learnin' me some Ruby) -

    -cites does two things:

    - - - -

    Each of the two above tasks are functions that you can use within Ruby, and are available from the command line/terminal so that you don't have to spin up Ruby. During a typical writing workflow (in which you are using bibtex formatted references) one can want a citation for their paper, and instead of opening up a browser and using Google Scholar or Web of Science, etc., you can quickly search in your terminal by doing e.g., thor cite:search 'keywords that will help find the paper, including author, year, etc.'. Which if matches will give you a DOI. Then you can do thor cite:getcite DOI/string | pbcopy and you get the bibtex reference in your clipboard. Then just paste into your bibtex file or references manager. See more examples below. -

    -First, we need to install dependencies

    -
    gem install httparty bibtex-ruby launchy
    -sudo gem install thor
    -
    -

    Then clone the repo down. The Makefile in the repo builds the gem, and installs the Thor module so you have access to it from anywhere. If you don't want the Thor commands, just do make install and just the gem will be installed.

    -
    git clone git@github.com:sckott/cites.git
    -cd cites
    -make
    -
    -



    - -

    From the command line: Thor

    - -

    I decided to use Thor to make functions within cites available on the cli. Thor is cool. For example, you can list the commands available like

    -
    thor list
    -
    cite
    ------
    -thor cite:getcite        # Get a citation from a DOI
    -thor cite:launch paper   # Open a paper from a given DOI in your default browser
    -thor cite:search STRING  # Get a DOI from a search string
    -
    -

    Get help for a particular method

    -
    thor help cite:getcite
    -
    Usage:
    -  thor cite:getcite
    -
    -Options:
    -  [--format=FORMAT]
    -                     # Default: text
    -  [--style=STYLE]
    -                     # Default: apa
    -  [--locale=LOCALE]
    -                     # Default: en-US
    -
    -Get a citation from a DOI
    -
    -

    This is what's associated with cites from the cli using Thor.

    - -

    Other commands are available, just type thor on the cli, and press enter. -

    - -

    Search for a paper

    - -

    From the CLI

    -
    thor cite:search 'Piwowar sharing data increases citation PLOS'
    -
    {"match"=>true, "doi"=>"10.1371/journal.pone.0000308", "text"=>"Piwowar sharing data increases citation PLOS"}
    -
    -

    And you can do many searches, separated with commas, like

    -
    thor cite:search 'Piwowar sharing data increases citation PLOS,boettiger Modeling stabilizing selection'
    -
    -

    Search within Ruby

    -
    require 'cites'
    -Cites.search('Piwowar sharing data increases citation PLOS')
    -
    => [{"match"=>true,
    -  "doi"=>"10.1371/journal.pone.0000308",
    -  "text"=>"Piwowar sharing data increases citation PLOS"}]
    -
    -



    - -

    Get a reference from a DOI

    - -

    From the CLI, default output is text format, apa style, locale en-US

    -
    thor cite:getcite '10.1186/1471-2105-14-16'
    -
    Boyle, B., Hopkins, N., Lu, Z., Raygoza Garay, J. A., Mozzherin, D., Rees, T., Matasci, N., et al. (2013). The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics, 14(1), 16. Springer (Biomed Central Ltd.). doi:10.1186/1471-2105-14-16
    -
    -

    Because we're using thor you can pass in options to the call on the cli, like here choose ris for the format

    -
    thor cite:getcite '10.1371/journal.pone.0000308' --format=ris
    -
    TY  - JOUR
    -T2  - PLoS ONE
    -AU  - Piwowar, Heather A.
    -AU  - Day, Roger S.
    -AU  - Fridsma, Douglas B.
    -SN  - 1932-6203
    -TI  - Sharing Detailed Research Data Is Associated with Increased Citation Rate
    -SP  - e308
    -VL  - 2
    -PB  - Public Library of Science
    -DO  - 10.1371/journal.pone.0000308
    -PY  - 2007
    -UR  - http://dx.doi.org/10.1371/journal.pone.0000308
    -ER  -
    -
    -

    And here bibtex for the format

    -
    thor cite:getcite '10.1371/journal.pone.0000308' --format=bibtex
    -
    @article{Piwowar_Day_Fridsma_2007,
    -  title = {Sharing Detailed Research Data Is Associated with Increased Citation Rate},
    -  volume = {2},
    -  url = {http://dx.doi.org/10.1371/journal.pone.0000308},
    -  doi = {10.1371/journal.pone.0000308},
    -  number = {3},
    -  journal = {PLoS ONE},
    -  publisher = {Public Library of Science},
    -  author = {Piwowar, Heather A. and Day, Roger S. and Fridsma, Douglas B.},
    -  editor = {Ioannidis, JohnEditor},
    -  year = {2007},
    -  month = {mar},
    -  pages = {e308}
    -}
    -
    -

    Two more options, style and locale are only available with text format, like

    -
    thor cite:getcite '10.1371/journal.pone.0000308' --format=text --style=mla --locale=fr-FR
    -
    Piwowar, Heather A., Roger S. Day, et Douglas B. Fridsma. « Sharing Detailed Research Data Is Associated with Increased Citation Rate ». éd par. John Ioannidis. PLoS ONE 2.3 (2007): e308.
    -
    -

    Within Ruby

    -
    require 'cites'
    -Cites.doi2cit('10.1371/journal.pone.0000308')
    -
    => ["Piwowar, H. A., Day, R. S., & Fridsma, D. B. (2007). Sharing Detailed Research Data Is Associated with Increased Citation Rate. (J. Ioannidis, Ed.)PLoS ONE, 2(3), e308. Public Library of Science. doi:10.1371/journal.pone.0000308"]
    -
    -



    - -

    Open paper in browser

    - -

    Uses Macrodocs. The default, using Macrodocs, only works for open access (#OA) articles. You can set the option oa to be false.

    -
    thor cite:launch '10.1371/journal.pone.0000308'
    -
    -

    It's super simple, it just concatenates your DOI onto http://macrodocs.org/?doi= to give in this case http://macrodocs.org/?doi=10.1371/journal.pone.0000308 for what you will get from that command.

    - -

    When you don't have an open access article, set the oa option flag to false, like --oa=false

    -
    thor cite:launch '10.1111/1365-2745.12157' --oa=false
    -
    -

    Setting --oa=false simply concatenates your doi onto http://dx.doi.org/, which then attempts to resolve to likely the publishers page for the article.

    + +Awesome. Much better to have an error message from a frog than just the harsh console alone :)
    diff --git a/_site/page16/index.html b/_site/page16/index.html index 450acbbd8e..bbe6181395 100644 --- a/_site/page16/index.html +++ b/_site/page16/index.html @@ -59,6 +59,210 @@

    Recology

      +
    +

    + + cites - citation stuff from the command line + +

    + + + + I've been learning Ruby, and decided to scratch an itch: getting citations for papers to put in a bibtex file or my Zotero library. This usually requires two parts: 1) searching for an article with keywords, and then 2) getting the citation once the paper is found. Since I am lazy, I would prefer to do this from the command line instead of opening up a browser. Thus => `cites`. (Note, I'm sure someone has created something better - the point is I'm learnin' me some Ruby) +

    +__cites does two things:__ + +* Search for a paper. Uses the [CrossRef Metadata Search API](http://search.crossref.org/help/api), which allows POST requests of free form text. +* Get a citation from a DOI. Uses CrossRef [citation formatting service](http://labs.crossref.org/citation-formatting-service/) to search for citation information. + +Each of the two above tasks are functions that you can use within Ruby, and are available from the command line/terminal so that you don't have to spin up Ruby. During a typical writing workflow (in which you are using bibtex formatted references) one can want a citation for their paper, and instead of opening up a browser and using Google Scholar or Web of Science, etc., you can quickly search in your terminal by doing e.g., `thor cite:search 'keywords that will help find the paper, including author, year, etc.'`. Which if matches will give you a DOI. Then you can do `thor cite:getcite DOI/string | pbcopy` and you get the bibtex reference in your clipboard. Then just paste into your bibtex file or references manager. See more examples below. +

    +First, we need to install dependencies + +``` +gem install httparty bibtex-ruby launchy +sudo gem install thor +``` + +Then clone the repo down. The `Makefile` in the repo builds the gem, and installs the Thor module so you have access to it from anywhere. If you don't want the Thor commands, just do `make install` and just the gem will be installed. + +``` +git clone git@github.com:sckott/cites.git +cd cites +make +``` +

    +### From the command line: Thor + +I decided to use [Thor](http://whatisthor.com/) to make functions within `cites` available on the cli. Thor is cool. For example, you can list the commands available like + +``` +thor list +``` + +``` +cite +----- +thor cite:getcite # Get a citation from a DOI +thor cite:launch paper # Open a paper from a given DOI in your default browser +thor cite:search STRING # Get a DOI from a search string +``` + +Get help for a particular method + +``` +thor help cite:getcite +``` + +``` +Usage: + thor cite:getcite + +Options: + [--format=FORMAT] + # Default: text + [--style=STYLE] + # Default: apa + [--locale=LOCALE] + # Default: en-US + +Get a citation from a DOI +``` + +This is what's associated with `cites` from the cli using Thor. + +Other commands are available, just type `thor` on the cli, and press enter. +

    +### Search for a paper + +From the CLI + +``` +thor cite:search 'Piwowar sharing data increases citation PLOS' +``` + +``` +{"match"=>true, "doi"=>"10.1371/journal.pone.0000308", "text"=>"Piwowar sharing data increases citation PLOS"} +``` + +And you can do many searches, separated with commas, like + +``` +thor cite:search 'Piwowar sharing data increases citation PLOS,boettiger Modeling stabilizing selection' +``` + +Search within Ruby + +```ruby +require 'cites' +Cites.search('Piwowar sharing data increases citation PLOS') +``` + +```ruby +=> [{"match"=>true, + "doi"=>"10.1371/journal.pone.0000308", + "text"=>"Piwowar sharing data increases citation PLOS"}] +``` +

    +### Get a reference from a DOI + +From the CLI, default output is text format, apa style, locale en-US + +``` +thor cite:getcite '10.1186/1471-2105-14-16' +``` + +``` +Boyle, B., Hopkins, N., Lu, Z., Raygoza Garay, J. A., Mozzherin, D., Rees, T., Matasci, N., et al. (2013). The taxonomic name resolution service: an online tool for automated standardization of plant names. BMC Bioinformatics, 14(1), 16. Springer (Biomed Central Ltd.). doi:10.1186/1471-2105-14-16 +``` + +Because we're using [thor](http://whatisthor.com/) you can pass in options to the call on the cli, like here choose `ris` for the format + +``` +thor cite:getcite '10.1371/journal.pone.0000308' --format=ris +``` + +``` +TY - JOUR +T2 - PLoS ONE +AU - Piwowar, Heather A. +AU - Day, Roger S. +AU - Fridsma, Douglas B. +SN - 1932-6203 +TI - Sharing Detailed Research Data Is Associated with Increased Citation Rate +SP - e308 +VL - 2 +PB - Public Library of Science +DO - 10.1371/journal.pone.0000308 +PY - 2007 +UR - http://dx.doi.org/10.1371/journal.pone.0000308 +ER - +``` + +And here `bibtex` for the format + +``` +thor cite:getcite '10.1371/journal.pone.0000308' --format=bibtex +``` + +``` +@article{Piwowar_Day_Fridsma_2007, + title = {Sharing Detailed Research Data Is Associated with Increased Citation Rate}, + volume = {2}, + url = {http://dx.doi.org/10.1371/journal.pone.0000308}, + doi = {10.1371/journal.pone.0000308}, + number = {3}, + journal = {PLoS ONE}, + publisher = {Public Library of Science}, + author = {Piwowar, Heather A. and Day, Roger S. and Fridsma, Douglas B.}, + editor = {Ioannidis, JohnEditor}, + year = {2007}, + month = {mar}, + pages = {e308} +} +``` + +Two more options, `style` and `locale` are only available with text format, like + +``` +thor cite:getcite '10.1371/journal.pone.0000308' --format=text --style=mla --locale=fr-FR +``` + +``` +Piwowar, Heather A., Roger S. Day, et Douglas B. Fridsma. « Sharing Detailed Research Data Is Associated with Increased Citation Rate ». éd par. John Ioannidis. PLoS ONE 2.3 (2007): e308. +``` + +Within Ruby + +```ruby +require 'cites' +Cites.doi2cit('10.1371/journal.pone.0000308') +``` + +```ruby +=> ["Piwowar, H. A., Day, R. S., & Fridsma, D. B. (2007). Sharing Detailed Research Data Is Associated with Increased Citation Rate. (J. Ioannidis, Ed.)PLoS ONE, 2(3), e308. Public Library of Science. doi:10.1371/journal.pone.0000308"] +``` +

    +### Open paper in browser + +Uses [Macrodocs](http://macrodocs.org/). The default, using Macrodocs, only works for open access (#OA) articles. You can set the option `oa` to be false. + +``` +thor cite:launch '10.1371/journal.pone.0000308' +``` + +It's super simple, it just concatenates your DOI onto `http://macrodocs.org/?doi=` to give in this case [http://macrodocs.org/?doi=10.1371/journal.pone.0000308](http://macrodocs.org/?doi=10.1371/journal.pone.0000308) for what you will get from that command. + +When you don't have an open access article, set the oa option flag to false, like `--oa=false` + +``` +thor cite:launch '10.1111/1365-2745.12157' --oa=false +``` + +Setting `--oa=false` simply concatenates your doi onto `http://dx.doi.org/`, which then attempts to resolve to likely the publishers page for the article. + +
    +

    @@ -68,106 +272,128 @@

    -

    Gaug.es is a really nice looking analytics platform as an alternative to Google Analytics. It is a paid service, but not that expensive really.

    + [Gaug.es](http://get.gaug.es/) is a really nice looking analytics platform as an alternative to Google Analytics. It is a paid service, but not that expensive really. + +We've made an R package to interact with the Gaug.es API called `rgauges`. Find it [on Github](https://github.com/ropensci/rgauges) and [on CRAN](http://cran.r-project.org/web/packages/rgauges/index.html). + +Although working with the Gaug.es API is nice and easy, they don't keep hourly visit stats and provide those via the API, so that you have to continually collect them yourself if you want them. That's what I have done for my own website. + +It took a few steps to get this data: + +* I wrote a little Ruby script using [Twelve gem](http://rubygems.org/gems/twelve) to collect data from the Gaug.es API every day at the same time, which just gets the patst 24 hours of data. This script makes a call to the Gaug.es API and sends the data to a [CouchDB](http://couchdb.apache.org/) database hosted on [Cloudant](https://cloudant.com/). In reality, the script is is embeded in a rack app as I don't think you can throw up a standalone script to Heroku. Here's the script: + +```ruby +class MyApp + require 'couchrest' + require 'twelve' + require 'date' + require 'time' + + def self.getgaugesdata_scott + bfg = Twelve.new('') + out = bfg.gauges('')['recent_hours'] + yip = { "from_url"=> "http://sckott.github.io/", "coll_date"=> Date.today.to_s, "coll_time"=> Time.now.utc.localtime.to_s, "recent_hours"=> out } + @db = CouchRest.database!("https://app16517180.heroku:@app16517180.heroku.cloudant.com/gaugesdb_scott") + @db.save_doc(yip) + + def call env + [200, {"Content-Type" => "text/html"}, ["no output printed here"]] + end +end +``` -

    We've made an R package to interact with the Gaug.es API called rgauges. Find it on Github and on CRAN.

    +* One little catch though: I run the Ruby script on Heroku, so I don't have to do it locally, but Heroku free instance goes down unless it's doing something. So I used a little service called [UptimeRobot](http://uptimerobot.com/) to ping the Heroku app every 5 minutes. UptimeRobot also is giving you uptime stats too on your app, which I don't really need, but is a cool feature. -

    Although working with the Gaug.es API is nice and easy, they don't keep hourly visit stats and provide those via the API, so that you have to continually collect them yourself if you want them. That's what I have done for my own website.

    +* And that's it. Now the data is stored from each day's collection of visitor stats to a free Cloudant CouchDB database. -

    It took a few steps to get this data:

    +## Regular Gaug.es data -
      -
    • I wrote a little Ruby script using Twelve gem to collect data from the Gaug.es API every day at the same time, which just gets the patst 24 hours of data. This script makes a call to the Gaug.es API and sends the data to a CouchDB database hosted on Cloudant. In reality, the script is is embeded in a rack app as I don't think you can throw up a standalone script to Heroku. Here's the script:
    • -
    -
    class MyApp
    -  require 'couchrest'
    -  require 'twelve'
    -  require 'date'
    -  require 'time'
    +First, let's look at what you can do with data that Gaug.es does give to you, using the `rgauges` R package.
     
    -  def self.getgaugesdata_scott
    -    bfg = Twelve.new('<gaugeskey>')
    -    out = bfg.gauges('<gaugeskey>')['recent_hours']
    -    yip = { "from_url"=> "http://sckott.github.io/", "coll_date"=> Date.today.to_s, "coll_time"=> Time.now.utc.localtime.to_s, "recent_hours"=> out }
    -    @db = CouchRest.database!("https://app16517180.heroku:<key>@app16517180.heroku.cloudant.com/gaugesdb_scott")
    -    @db.save_doc(yip)
    +********************
     
    -  def call env
    -    [200, {"Content-Type" => "text/html"}, ["no output printed here"]] 
    -  end
    -end
    -
    -
      -
    • One little catch though: I run the Ruby script on Heroku, so I don't have to do it locally, but Heroku free instance goes down unless it's doing something. So I used a little service called UptimeRobot to ping the Heroku app every 5 minutes. UptimeRobot also is giving you uptime stats too on your app, which I don't really need, but is a cool feature.

    • -
    • And that's it. Now the data is stored from each day's collection of visitor stats to a free Cloudant CouchDB database.

    • -
    +### Install rgauges -

    Regular Gaug.es data

    -

    First, let's look at what you can do with data that Gaug.es does give to you, using the rgauges R package.

    +{% highlight r %} +install.packages("devtools") +library(devtools) +install_github("rgauges", "ropensci") +{% endhighlight %} -
    -

    Install rgauges

    +### Load rgauges and other dependency libraries -
    install.packages("devtools")
    -library(devtools)
    -install_github("rgauges", "ropensci")
    -

    Load rgauges and other dependency libraries

    +{% highlight r %} +library(rgauges) +library(ggplot2) +{% endhighlight %} -
    library(rgauges)
    -library(ggplot2)
    -

    Your info

    +### Your info -
    gs_me()
    -
    ## $user
    +{% highlight r %}
    +gs_me()
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +## $user
     ## $user$name
    -## [1] "Scott Chamberlain"
    +## [1] "Scott Chamberlain"
     ## 
     ## $user$email
    -## [1] "myrmecocystus@gmail.com"
    +## [1] "myrmecocystus@gmail.com"
     ## 
     ## $user$id
    -## [1] "4eddbafb613f5d5139000001"
    +## [1] "4eddbafb613f5d5139000001"
     ## 
     ## $user$last_name
    -## [1] "Chamberlain"
    +## [1] "Chamberlain"
     ## 
     ## $user$urls
     ## $user$urls$self
    -## [1] "https://secure.gaug.es/me"
    +## [1] "https://secure.gaug.es/me"
     ## 
     ## $user$urls$clients
    -## [1] "https://secure.gaug.es/clients"
    +## [1] "https://secure.gaug.es/clients"
     ## 
     ## $user$urls$gauges
    -## [1] "https://secure.gaug.es/gauges"
    +## [1] "https://secure.gaug.es/gauges"
     ## 
     ## 
     ## $user$first_name
    -## [1] "Scott"
    +## [1] "Scott" +{% endhighlight %} + + +#### Traffic -

    Traffic

    -
    gs_traffic(id = "4efd83a6f5a1f5158a000004")
    +{% highlight r %} +gs_traffic(id = "4efd83a6f5a1f5158a000004") +{% endhighlight %} -
    ## $metadata
    +
    +
    +{% highlight text %}
    +## $metadata
     ## $metadata$views
     ## [1] 386
     ## 
     ## $metadata$urls
     ## $metadata$urls$older
    -## [1] "https://secure.gaug.es/gauges/4efd83a6f5a1f5158a000004/traffic?date=2013-12-01"
    +## [1] "https://secure.gaug.es/gauges/4efd83a6f5a1f5158a000004/traffic?date=2013-12-01"
     ## 
     ## $metadata$urls$newer
     ## NULL
     ## 
     ## 
     ## $metadata$date
    -## [1] "2014-01-17"
    +## [1] "2014-01-17"
     ## 
     ## $metadata$people
     ## [1] 208
    @@ -191,13 +417,21 @@ 

    Traffic

    ## 14 11 2014-01-14 9 ## 15 23 2014-01-15 16 ## 16 16 2014-01-16 14 -## 17 32 2014-01-17 25
    +## 17 32 2014-01-17 25 +{% endhighlight %} + + +### Screen/browser information + -

    Screen/browser information

    +{% highlight r %} +gs_reso(id = "4efd83a6f5a1f5158a000004") +{% endhighlight %} -
    gs_reso(id = "4efd83a6f5a1f5158a000004")
    -
    ## $browser_height
    +
    +{% highlight text %}
    +## $browser_height
     ##   views title
     ## 1   190   600
     ## 2    77   768
    @@ -225,125 +459,193 @@ 

    Screen/browser information

    ## 5 14 2000 ## 6 6 320 ## 7 6 480 -## 8 4 800
    +## 8 4 800 +{% endhighlight %} + + +### Visualize traffic data + +You'll need to load ggplot2 + -

    Visualize traffic data

    +{% highlight r %} +library(ggplot2) +out <- gs_gauge_detail(id = "4efd83a6f5a1f5158a000004") +vis_gauge(out) +{% endhighlight %} -

    You'll need to load ggplot2

    -
    library(ggplot2)
    -out <- gs_gauge_detail(id = "4efd83a6f5a1f5158a000004")
    -vis_gauge(out)
    -
    ## Using hour, time as id variables
    +{% highlight text %}
    +## Using hour, time as id variables
     ## Using date as id variables
    -## Using date as id variables
    +## Using date as id variables +{% endhighlight %} + +![center](/public/img/2014-01-17-rgauges-hourly/unnamed-chunk-4.png) + +{% highlight text %} +## NULL +{% endhighlight %} + -

    center

    +******************** +******************** -
    ## NULL
    +## Historic hourly Gaug.es data -
    +Now let's play with the hourly data. To do that we aren't going to use `rgauges`, but rather call the Cloudant API. CouchDB provides a RESTful API out of the box, so we can do a call like `https://app16517180.heroku.cloudant.com/gaugesdb_scott/_all_docs?limit=20` to get metadata (or other calls to get the documents themselves). (note: that url won't work for you since you don't have my login info) -
    +### Get some data -

    Historic hourly Gaug.es data

    -

    Now let's play with the hourly data. To do that we aren't going to use rgauges, but rather call the Cloudant API. CouchDB provides a RESTful API out of the box, so we can do a call like https://app16517180.heroku.cloudant.com/gaugesdb_scott/_all_docs?limit=20 to get metadata (or other calls to get the documents themselves). (note: that url won't work for you since you don't have my login info)

    +{% highlight r %} +library(devtools) +install_github("sckott/sofa") # or install_github('sofa', 'sckott') +{% endhighlight %} -

    Get some data

    -
    library(devtools)
    -install_github("sckott/sofa")  # or install_github('sofa', 'sckott')
    -
    library(sofa)
    -cloudant_name <- "app16517180.heroku"
    -cloudant_pwd <- getOption("sofa_cloudant_heroku")[[2]]
    -cushion(sofa_cloudant = c(cloudant_name, cloudant_pwd))
    -dat <- sofa_alldocs(cushion = "sofa_cloudant", dbname = "gaugesdb_scott", include_docs = "true")
    -

    Manipulate and visualize

    +{% highlight r %} +library(sofa) +cloudant_name <- "app16517180.heroku" +cloudant_pwd <- getOption("sofa_cloudant_heroku")[[2]] +cushion(sofa_cloudant = c(cloudant_name, cloudant_pwd)) +dat <- sofa_alldocs(cushion = "sofa_cloudant", dbname = "gaugesdb_scott", include_docs = "true") +{% endhighlight %} -
    library(plyr)
    -dates <- ldply(dat$rows, function(x) x$doc$coll_date)
    -min(dates$V1)
    -
    ## [1] "2013-06-26"
    +### Manipulate and visualize -
    max(dates$V1)
    -
    ## [1] "2014-01-16"
    +{% highlight r %} +library(plyr) +dates <- ldply(dat$rows, function(x) x$doc$coll_date) +min(dates$V1) +{% endhighlight %} -
    length(dates$V1)
    -
    ## [1] 198
    -

    So we've got 198 days of data, first collected near end of June, and most recent yesterday. Now get actual visits data

    +{% highlight text %} +## [1] "2013-06-26" +{% endhighlight %} -
    df <- ldply(dat$rows, function(x) {
    -    y <- do.call(rbind, lapply(x$doc$recent_hours, data.frame))
    -    data.frame(date = x$doc$coll_date, y)
    -})
    -df$date <- as.Date(df$date)
    -df$hour <- as.numeric(df$hour)
     
    -library(reshape2)
    -df_melt <- melt(df, id.vars = c("date", "hour"))
    -head(df_melt)
    -
    ##         date hour variable value
    +{% highlight r %}
    +max(dates$V1)
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +## [1] "2014-01-16"
    +{% endhighlight %}
    +
    +
    +
    +{% highlight r %}
    +length(dates$V1)
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +## [1] 198
    +{% endhighlight %}
    +
    +
    +So we've got 198 days of data, first collected near end of June, and most recent yesterday. Now get actual visits data
    +
    +
    +{% highlight r %}
    +df <- ldply(dat$rows, function(x) {
    +    y <- do.call(rbind, lapply(x$doc$recent_hours, data.frame))
    +    data.frame(date = x$doc$coll_date, y)
    +})
    +df$date <- as.Date(df$date)
    +df$hour <- as.numeric(df$hour)
    +
    +library(reshape2)
    +df_melt <- melt(df, id.vars = c("date", "hour"))
    +head(df_melt)
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +##         date hour variable value
     ## 1 2013-09-18    1    views     2
     ## 2 2013-09-18    2    views     3
     ## 3 2013-09-18    3    views     2
     ## 4 2013-09-18    4    views     0
     ## 5 2013-09-18    5    views     1
    -## 6 2013-09-18    6    views    10
    +## 6 2013-09-18 6 views 10 +{% endhighlight %} + + +We need to combine the date and hour in to one date time string: + -

    We need to combine the date and hour in to one date time string:

    +{% highlight r %} +df_melt <- transform(df_melt, datetime = as.POSIXct(paste(date, sprintf("%s:00:00", + hour)))) +head(df_melt) +{% endhighlight %} -
    df_melt <- transform(df_melt, datetime = as.POSIXct(paste(date, sprintf("%s:00:00", 
    -    hour))))
    -head(df_melt)
    -
    ##         date hour variable value            datetime
    +
    +{% highlight text %}
    +##         date hour variable value            datetime
     ## 1 2013-09-18    1    views     2 2013-09-18 01:00:00
     ## 2 2013-09-18    2    views     3 2013-09-18 02:00:00
     ## 3 2013-09-18    3    views     2 2013-09-18 03:00:00
     ## 4 2013-09-18    4    views     0 2013-09-18 04:00:00
     ## 5 2013-09-18    5    views     1 2013-09-18 05:00:00
    -## 6 2013-09-18    6    views    10 2013-09-18 06:00:00
    +## 6 2013-09-18 6 views 10 2013-09-18 06:00:00 +{% endhighlight %} + + +Now plot all data + + +{% highlight r %} +library(ggplot2); library(scales) -

    Now plot all data

    +gauge_theme <- function(){ + list(theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.position = c(0.85,0.85), + legend.key = element_blank())) +} -
    library(ggplot2); library(scales)
    +ggplot(df_melt, aes(datetime, value, group=variable, colour=variable)) +
    +    theme_bw(base_size=18) + 
    +    geom_line(size=2) +
    +    scale_color_brewer(name="", palette=3) +
    +    labs(x="", y="") +
    +    gauge_theme()
    +{% endhighlight %}
     
    -gauge_theme <- function(){
    -  list(theme(panel.grid.major = element_blank(),
    -             panel.grid.minor = element_blank(),
    -             legend.position = c(0.85,0.85),
    -             legend.key = element_blank()))
    -}
    +![center](/public/img/2014-01-17-rgauges-hourly/unnamed-chunk-8.png) 
     
    -ggplot(df_melt, aes(datetime, value, group=variable, colour=variable)) +
    -    theme_bw(base_size=18) + 
    -    geom_line(size=2) +
    -    scale_color_brewer(name="", palette=3) +
    -    labs(x="", y="") +
    -    gauge_theme()
    -

    center

    +And just one day -

    And just one day

    -
    oneday <- df_melt[ as.character(df_melt$date) %in% "2013-11-12", ]
    -ggplot(oneday, aes(datetime, value, group=variable, colour=variable)) +
    -    theme_bw(base_size=18) + 
    -    geom_line(size=2) +
    -    scale_color_brewer(name="", palette=3) +
    -    labs(x="", y="") +
    -    gauge_theme()
    +{% highlight r %} +oneday <- df_melt[ as.character(df_melt$date) %in% "2013-11-12", ] +ggplot(oneday, aes(datetime, value, group=variable, colour=variable)) + + theme_bw(base_size=18) + + geom_line(size=2) + + scale_color_brewer(name="", palette=3) + + labs(x="", y="") + + gauge_theme() +{% endhighlight %} + +![center](/public/img/2014-01-17-rgauges-hourly/unnamed-chunk-9.png) -

    center

    @@ -356,138 +658,117 @@

    -

    I started using Jekyll when I didn't really know HTML, CSS, or Ruby - so I've had to learn a lot - but using Jekyll has been a great learning experience for all those languages.

    - -

    I've tried to boil down steps to building a Jekyll site or blog to the minimal steps:

    - -



    + I started using Jekyll when I didn't really know HTML, CSS, or Ruby - so I've had to learn a lot - but using Jekyll has been a great learning experience for all those languages. -

    Install Jekyll

    +I've tried to boil down steps to building a Jekyll site or blog to the minimal steps: -
      -
    • Mac/Linux/Unix: +

      -
        -
      • Install dependencies: +### Install Jekyll -
      • -
      • Install Jekyll using RubyGems gem install jekyll (you may need to do sudo...)
      • -
      • If you're having trouble installing, see the troubleshooting page.
      • -
    • -
    • Windows: Jekyll doesn't officially support installation on Windows - follow these steps for a Windows install.
    • -
    ++ Mac/Linux/Unix: + + Install dependencies: + + [Ruby](http://www.ruby-lang.org/en/downloads/) + + [RubyGems](http://rubygems.org/pages/download) + + Install Jekyll using RubyGems `gem install jekyll` (you may need to do `sudo...`) + + If you're having trouble installing, see the [troubleshooting page](http://jekyllrb.com/docs/troubleshooting/). ++ Windows: Jekyll doesn't officially support installation on Windows - follow [these steps](http://www.madhur.co.in/blog/2011/09/01/runningjekyllwindows.html) for a Windows install. -



    +

    -

    Make a site

    +### Make a site -

    The easiest way to get started is by using the command jekyll new SITENAME - let's replace SITENAME with foobar for this example.

    +The easiest way to get started is by using the command `jekyll new SITENAME` - let's replace `SITENAME` with `foobar` for this example. -

    So we run jekyll new foobar, which gives us:

    +So we run `jekyll new foobar`, which gives us: -
    New jekyll site installed in /Users/scottmac2/foobar.
    +{% highlight bash %} +New jekyll site installed in /Users/scottmac2/foobar. +{% endhighlight %} -

    Go into that directory, and run

    +Go into that directory, and run -
    cd foobar
    -jekyll serve
    +{% highlight bash %} +cd foobar +jekyll serve +{% endhighlight %} -

    Which gives you the files and directories:

    +Which gives you the files and directories: -
    |
    ---|- _config.yml
    -  |- _posts
    -  |- css
    -  |- _layouts
    -  |- _site
    -  |- index.html
    +{% highlight bash %} +| +--|- _config.yml + |- _posts + |- css + |- _layouts + |- _site + |- index.html +{% endhighlight %} -

    Then point your browser to http://localhost:4000/. And you should see the following:

    +Then point your browser to [http://localhost:4000/](http://localhost:4000/). And you should see the following: -

    +![](http://f.cl.ly/items/2q322a2P3f2m2A3a3l0O/Screen%20Shot%202013-11-20%20at%209.54.21%20AM.png) -



    +

    -

    Write a new blog post

    +### Write a new blog post -

    We'll add a new file to the _posts folder.

    +We'll add a new file to the _posts folder. -
    ---
    +{% highlight bash %}
    +---
     layout: post
     title:  My second post
     date:   2013-11-20
     categories: jekyll programming R
     ---
     
    -My second blog post!
    - -

    Paste this in to a new file in the _posts folder, save as today's date (2013-11-20) plus the post name, which gives us 2013-11-20-second-post.md.

    +My second blog post! +{% endhighlight %} -



    +Paste this in to a new file in the `_posts` folder, save as today's date ({{ page.date | date: "%Y-%m-%d" }}) plus the post name, which gives us {{ page.date | date: "%Y-%m-%d" }}-second-post.md. -

    Deploying

    +

    -

    An obvious option given that Jekyll was built by Github, is to put it up on Github. Github has some instructions here. Here is my attempt at instructions:

    +### Deploying -
      -
    • If you don't have a Github account already, create one - it's free.
    • -
    • Set up Git. Github's help for this: https://help.github.com/articles/set-up-git
    • -
    • Creat a new repo on Github, with the same name as your repo on your machine, in this case foobar.
    • -
    • Make your new blog directory foobar a git repo by doing git init within the repo.
    • -
    • Add you files to be tracked by git via git add --all
    • -
    • Commit your changes by git commit -am 'new blog files added'
    • -
    • Make a gh-pages branch by doing git branch gh-pages.
    • -
    • Add link for your repo on Github: git remote add origin https://github.com/<yourgithubusername>/foobar.git
    • -
    • Push to Github using git push -u origin master
    • -
    +An obvious option given that Jekyll was built by Github, is to put it up on Github. Github has some instructions [here](http://jekyllrb.com/docs/github-pages/). Here is my attempt at instructions: -

    Github gives you one repo that you can name <yourgithubusername>.github.io that will be viewable at the URL http://<yourgithubusername>.github.io. You can have your blog/website on the master branch, and you don't need to create a gh-pages branch. But if you have your site in any other named repo, you will need the gh-pages branch. If you don't use a <yourgithubusername>.github.io repo, your site will be viewable at <yourgithubusername>.github.io/<reponame>, in this case <yourgithubusername>.github.io/foobar.

    ++ If you don't have a Github account already, [create one](https://help.github.com/articles/signing-up-for-a-new-github-account) - it's free. ++ Set up Git. Github's help for this: https://help.github.com/articles/set-up-git ++ Creat a new repo on Github, with the same name as your repo on your machine, in this case `foobar`. ++ Make your new blog directory `foobar` a git repo by doing `git init` within the repo. ++ Add you files to be tracked by git via `git add --all` ++ Commit your changes by `git commit -am 'new blog files added'` ++ Make a `gh-pages` branch by doing `git branch gh-pages`. ++ Add link for your repo on Github: `git remote add origin https://github.com//foobar.git` ++ Push to Github using `git push -u origin master` -

    Beginners take note: Instead of the command line, you could use a Git GUI, from Github (OSX, Windows), or others, e.g., GitBox.

    +Github gives you one repo that you can name `.github.io` that will be viewable at the URL `http://.github.io`. You can have your blog/website on the master branch, and you don't need to create a `gh-pages` branch. But if you have your site in any other named repo, you will need the `gh-pages` branch. If you don't use a `.github.io` repo, your site will be viewable at `.github.io/`, in this case `.github.io/foobar`. -



    +*Beginners take note:* Instead of the command line, you could use a Git GUI, from Github ([OSX](http://mac.github.com/), [Windows](http://windows.github.com/)), or others, e.g., [GitBox](http://gitboxapp.com/). -

    Other info

    +

    -

    That's the basics of how to get started. Inevitably, you'll run into problems with various dependencies. The Jekyll site has a lot of documntation now, so go there for help - and see a roundup of links below.

    +### Other info -

    For inspiration, here are many examples of sites that use Jekyll: http://jekyllrb.com/docs/sites/. If you want to build off someone else's work, find one that provides their code.

    - -
    - -

    A roundup of links for building static sites with jekyll

    - - - -
    - -
    -

    - - Code display in scholarly journals - -

    +That's the basics of how to get started. Inevitably, you'll run into problems with various dependencies. The [Jekyll site](http://jekyllrb.com/) has a lot of documntation now, so go there for help - and see a roundup of links below. - +For inspiration, here are many examples of sites that use Jekyll: http://jekyllrb.com/docs/sites/. If you want to build off someone else's work, find one that provides their code. -

    Code in journals, that is, code you would type to do some programmatic operation in say R or Python, is kind of a mess to say the least. Okay, so you can SEE code in papers, but code is not formatted in a way that facilites reuse. If an author in a paper writes out some code for software they create, or an analysis they do in the paper, wouldn't it be nice for a reader to be able to copy and paste that code directly into whatever environment that code should execute in, and actually work. Of course there is dependencies, etc. for that software to worry about, but here I am just concerned with the code formatting in articles. Code is displayed as an image in some cases (gasp!). Additionally, there's this thing called the internet, and we can use color, so let's highlight code already. At least in one of our recent rOpenSci papers in F1000 Research, they do use syntax highlighting - w00t!

    +---------------- -

    Carl Boettiger (@cboettig) and I disccused how frustrated we are with the state of code in papers, and started a Github gist, listing publishers/journals and how they display code. It lives here: https://gist.github.com/sckott/6787278.

    +A roundup of links for building static sites with jekyll -

    We have a start, but would like your help in filling this list out more. What are the code presentation practices for various publishers and journals? With a list of what currently happens, perhaps we can start to convince publishers to display code more appropriately, partly by pointing out that "XYZ publisher does it really well, why can't you?". I tried to record info in a standardized way across publishers...

    +* [http://net.tutsplus.com/tutorials/other/building-static-sites-with-jekyll/](http://net.tutsplus.com/tutorials/other/building-static-sites-with-jekyll/) +* [http://www.andrewmunsell.com/tutorials/jekyll-by-example/](http://www.andrewmunsell.com/tutorials/jekyll-by-example/) +* [Jekyll Bootstrap](http://jekyllbootstrap.com/) +* Jekyll thoughts by [Carl Boettiger](http://carlboettiger.info/index.html): [http://carlboettiger.info/2012/12/30/learning-jekyll.html](http://carlboettiger.info/2012/12/30/learning-jekyll.html) +* [http://danielmcgraw.com/2011/04/14/The-Ultimate-Guide-To-Getting-Started-With-Jekyll-Part-1/](http://danielmcgraw.com/2011/04/14/The-Ultimate-Guide-To-Getting-Started-With-Jekyll-Part-1/) +* [A book on building sites with Jekyll](http://mijingo.com/products/screencasts/static-websites-with-jekyll/) +* [http://yeswejekyll.com/](http://yeswejekyll.com/) +* [http://hellarobots.com/2012/01/06/blogging-with-jekyll-quickstart.html](http://hellarobots.com/2012/01/06/blogging-with-jekyll-quickstart.html) +* [http://www.sitepoint.com/zero-to-jekyll-in-20-minutes/](http://www.sitepoint.com/zero-to-jekyll-in-20-minutes/)
    diff --git a/_site/page17/index.html b/_site/page17/index.html index 7e209198c9..68ffd448f7 100644 --- a/_site/page17/index.html +++ b/_site/page17/index.html @@ -59,6 +59,23 @@

    Recology

      +
    +

    + + Code display in scholarly journals + +

    + + + + Code in journals, that is, code you would type to do some programmatic operation in say R or Python, is kind of a mess to say the least. Okay, so you can **SEE** code in papers, but code is not formatted in a way that facilites reuse. If an author in a paper writes out some code for software they create, or an analysis they do in the paper, wouldn't it be nice for a reader to be able to copy and paste that code directly into whatever environment that code should execute in, and actually work. Of course there is dependencies, etc. for that software to worry about, but here I am just concerned with the code formatting in articles. Code is displayed as an image in some cases (gasp!). Additionally, there's this thing called the internet, and we can use color, so let's highlight code already. At least in one of our recent [rOpenSci](http://ropensci.org/) papers in F1000 Research, [they do use syntax highlighting](http://f1000research.com/articles/2-191/v1) - w00t! + +Carl Boettiger (@cboettig) and I disccused how frustrated we are with the state of code in papers, and started a Github gist, listing publishers/journals and how they display code. It lives here: [https://gist.github.com/sckott/6787278](https://gist.github.com/sckott/6787278). + +We have a start, but would like your help in filling this list out more. What are the code presentation practices for various publishers and journals? With a list of what currently happens, perhaps we can start to convince publishers to display code more appropriately, partly by pointing out that "XYZ publisher does it really well, why can't you?". I tried to record info in a standardized way across publishers... + +
    +

    @@ -68,34 +85,28 @@

    -

    Note: This is cross-posted from the rOpenSci blog, which will update with this post when our technical snafu is fixed.

    + *Note: This is cross-posted from the [rOpenSci blog](http://ropensci.org/blog), which will update with this post when our technical snafu is fixed.* -

    With the US government shut down, many of the federal government provided data APIs are down. We write R packages to interact with many of these APIs. We have been tweeting about what APIs that are down related to R pacakges we make, but we thought we would write up a proper blog post on the issue.

    +With the US government shut down, many of the federal government provided data APIs are down. We write R packages to interact with many of these APIs. We have been tweeting about what APIs that are down related to R pacakges we make, but we thought we would write up a proper blog post on the issue. -

    NCBI services are still up! NCBI is within NIH, which is within the Department of Health and Human Services. Here is the message on the NCBI page:

    +NCBI services are still up! NCBI is within NIH, which is within the Department of Health and Human Services. Here is the message on the NCBI page: -
    -

    The information on this web site remains accessible; but, due to the lapse in government funding, the information may not be up to date, and the agency may not be able to respond to inquiries until appropriations are enacted. For updates regarding government operating status see USA.gov.

    -
    +> The information on this web site remains accessible; but, due to the lapse in government funding, the information may not be up to date, and the agency may not be able to respond to inquiries until appropriations are enacted. For updates regarding government operating status see USA.gov. -

    Most USGS services are down. Some of the message on the USGS page (ITIS is under USGS, which is under the Department of the Interior):

    +Most USGS services are down. Some of the message on the USGS page (ITIS is under USGS, which is under the Department of the Interior): -
    -

    Due to the Federal government shutdown, usgs.gov and most associated web sites are unavailable. Only web sites necessary to protect lives and property will be maintained...

    -
    +> Due to the Federal government shutdown, usgs.gov and most associated web sites are unavailable. Only web sites necessary to protect lives and property will be maintained... -

    However, the USGS BISON service is still up for some reason - perhaps a different pot of money than other USGS projects?

    +However, the USGS BISON service is still up for some reason - perhaps a different pot of money than other USGS projects? -

    Some of the shutdown message from NOAA, under the Department of Commerce:

    +Some of the shutdown message from NOAA, under the Department of Commerce: -
    -

    Due to the Federal government shutdown, NOAA.gov and most associated web sites are unavailable. Specific NOAA web sites necessary to protect lives and property are operational and will be maintained.

    -
    +> Due to the Federal government shutdown, NOAA.gov and most associated web sites are unavailable. Specific NOAA web sites necessary to protect lives and property are operational and will be maintained. -

    Here's a table of APIs we interact with, the related R package, and any notes: +Here's a table of APIs we interact with, the related R package, and any notes:

    - + @@ -113,7 +124,7 @@

    - + @@ -128,26 +139,26 @@

    - -
    API provider API still up?No :( rnoaa
    USGS ITIS (Integrated Taxonomic Information Service) link No :( taxizeNo :( rpubmed

    + + + +

    +For those wanting to get NOAA climate data, perhaps check out the [RNCEP package][rncep]. -



    -For those wanting to get NOAA climate data, perhaps check out the RNCEP package.

    +For those using taxize, you can grab taxonomic IDs from NCBI using `get_uid()` rather than the ITIS version `get_tsn()`. With a UID from NCBI, you can do things like get a taxonomic classification using the function `classification()`. There are many non-government taxonomic sources in taxize, so you should be able to find what you need without ITIS. Other functions that use ITIS, and that you should avoid until the shutdown is over, are: -

    For those using taxize, you can grab taxonomic IDs from NCBI using get_uid() rather than the ITIS version get_tsn(). With a UID from NCBI, you can do things like get a taxonomic classification using the function classification(). There are many non-government taxonomic sources in taxize, so you should be able to find what you need without ITIS. Other functions that use ITIS, and that you should avoid until the shutdown is over, are:

    +* A long list carried over from the itis package that is now within taxize: `getacceptednamesfromtsn()`, `getanymatchcount()`, `getcommentdetailfromtsn()`, `getcommonnamesfromtsn()`, `getcoremetadatafromtsn()`, `getcoveragefromtsn()`, `getcredibilityratingfromtsn()`, `getcredibilityratings()`, `getcurrencyfromtsn()`, `getdatedatafromtsn()`, `getdescription()`, `getexpertsfromtsn()`, `getfullhierarchyfromtsn()`, `getfullrecordfromlsid()`, `getfullrecordfromtsn()`, `getgeographicdivisionsfromtsn()`, `getgeographicvalues()`, `getglobalspeciescompletenessfromtsn()`, `gethierarchydownfromtsn()`, `gethierarchyupfromtsn()`, `getitistermsfromcommonname()`, `getitistermsfromscientificname()`, `getjurisdictionaloriginfromtsn()`, `getjurisdictionoriginvalues()`, `getjurisdictionvalues()`, `getkingdomnamefromtsn()`, `getkingdomnames()`, `getlastchangedate()`, `getlsidfromtsn()`, `getothersourcesfromtsn()`, `getparenttsnfromtsn()`, `getpublicationsfromtsn()`, `getranknames()`, `getrecordfromlsid()`, `getreviewyearfromtsn()`, `getscientificnamefromtsn()`, `getsynonymnamesfromtsn()`, `gettaxonauthorshipfromtsn()`, `gettaxonomicranknamefromtsn()`, `gettaxonomicusagefromtsn()`, `gettsnbyvernacularlanguage()`, `gettsnfromlsid()`, `getunacceptabilityreasonfromtsn()`, `getvernacularlanguages()`, `searchbycommonname()`, `searchbycommonnamebeginswith()`, `searchbycommonnameendswith()`, `searchbyscientificname()`, `searchforanymatch()`, `searchforanymatchpaged()` +* `itis_acceptname()` +* `itis_downstream()` +* `itis_name()` +* `itis_taxrank()` +* In `tax_agg()`, only use db="ncbi" +* In `tax_name()`, only use db="ncbi" +* In `tax_rank()`, only use db="ncbi" -
      -
    • A long list carried over from the itis package that is now within taxize: getacceptednamesfromtsn(), getanymatchcount(), getcommentdetailfromtsn(), getcommonnamesfromtsn(), getcoremetadatafromtsn(), getcoveragefromtsn(), getcredibilityratingfromtsn(), getcredibilityratings(), getcurrencyfromtsn(), getdatedatafromtsn(), getdescription(), getexpertsfromtsn(), getfullhierarchyfromtsn(), getfullrecordfromlsid(), getfullrecordfromtsn(), getgeographicdivisionsfromtsn(), getgeographicvalues(), getglobalspeciescompletenessfromtsn(), gethierarchydownfromtsn(), gethierarchyupfromtsn(), getitistermsfromcommonname(), getitistermsfromscientificname(), getjurisdictionaloriginfromtsn(), getjurisdictionoriginvalues(), getjurisdictionvalues(), getkingdomnamefromtsn(), getkingdomnames(), getlastchangedate(), getlsidfromtsn(), getothersourcesfromtsn(), getparenttsnfromtsn(), getpublicationsfromtsn(), getranknames(), getrecordfromlsid(), getreviewyearfromtsn(), getscientificnamefromtsn(), getsynonymnamesfromtsn(), gettaxonauthorshipfromtsn(), gettaxonomicranknamefromtsn(), gettaxonomicusagefromtsn(), gettsnbyvernacularlanguage(), gettsnfromlsid(), getunacceptabilityreasonfromtsn(), getvernacularlanguages(), searchbycommonname(), searchbycommonnamebeginswith(), searchbycommonnameendswith(), searchbyscientificname(), searchforanymatch(), searchforanymatchpaged()
    • -
    • itis_acceptname()
    • -
    • itis_downstream()
    • -
    • itis_name()
    • -
    • itis_taxrank()
    • -
    • In tax_agg(), only use db="ncbi"
    • -
    • In tax_name(), only use db="ncbi"
    • -
    • In tax_rank(), only use db="ncbi"
    • -
    +Let us know if you have any questions or comments. -

    Let us know if you have any questions or comments.

    +[rncep]: http://cran.r-project.org/web/packages/RNCEP/index.html

    @@ -160,107 +171,103 @@

    -

    Eduard Szöcs and I started developing a taxonomic toolbelt for the R language a while back , which lets you interact with a multitude of taxonomic databases on the web. We have a paper in F1000Research if you want to find out more (see here).

    + Eduard Szöcs and I started developing a taxonomic toolbelt for the R language a while back , which lets you interact with a multitude of taxonomic databases on the web. We have a paper in F1000Research if you want to find out more (see [here](http://f1000research.com/articles/2-191/v1)). -

    I thought it would be fun to rewrite some of taxize in other languages to learn more languages. Ruby and Python made the most sense to try. I did try others (Julia, Node), but gave up on those for now. The goal here isn't to port taxize to Python and Ruby right now - it's for me to learn myself some coding.

    +I thought it would be fun to rewrite some of taxize in other languages to learn more languages. Ruby and Python made the most sense to try. I did try others (Julia, Node), but gave up on those for now. The goal here isn't to port taxize to Python and Ruby right now - it's for me to learn myself some coding. -

    Anyway, here's use of the same function in three languages: R, Ruby, and Python. The function searches the Global Names Index, but is named slightly differently in R (gni_search) vs. Ruby/Python (gniSearch). (yes, I realize the package names aren't consistent)

    +Anyway, here's use of the same function in three languages: R, Ruby, and Python. The function searches the [Global Names Index](http://gni.globalnames.org/), but is named slightly differently in R (`gni_search`) vs. Ruby/Python (`gniSearch`). (yes, I realize the package names aren't consistent) -

    Note that there are only a few functions available in the Ruby and Python versions:

    +Note that there are only a few functions available in the Ruby and Python versions: -
      -
    • itisPing
    • -
    • gnrResolve
    • -
    • gniParse
    • -
    • gniSearch
    • -
    • gniDetails
    • -
    • colChildren (Python, not Ruby)
    • -
    +* itisPing +* gnrResolve +* gniParse +* gniSearch +* gniDetails +* colChildren (Python, not Ruby) -

    And the behavior of these functions does not necessarily match that in the R version.

    +And the behavior of these functions does not necessarily match that in the R version. -

    One thing I have learned is that packaging in R is much harder than in Python or Ruby. devtools does make R packaging easier, but still...

    +One thing I have learned is that packaging in R is much harder than in Python or Ruby. [devtools](cran.r-project.org/web/packages/devtools/index.html) does make R packaging easier, but still... -



    -

    R

    +

    +### R + +Code [here](https://github.com/ropensci/taxize_) -

    Code here

    +{% highlight r %} +install.packages("taxize") +library(taxize) +{% endhighlight %} -
    install.packages("taxize")
    -library(taxize)
    +Then search for a taxonomic name -

    Then search for a taxonomic name

    +{% highlight r %} +out <- gni_search('Helianthus annuus') +out[1,] +{% endhighlight %} -
    out <- gni_search('Helianthus annuus')
    -out[1,]
    -
                   name      id
    -1 Helianthus annuus 3329657
    +{% highlight r %}
    +               name      id
    +1 Helianthus annuus 3329657
                                                                      lsid
    -1 urn:lsid:globalnames.org:index:18f9c244-a450-535e-adcd-2bfaf85c9b2e
    +1 urn:lsid:globalnames.org:index:18f9c244-a450-535e-adcd-2bfaf85c9b2e
                                   uuid_hex resource_url
    -1 18f9c244-a450-535e-adcd-2bfaf85c9b2e         none
    +1 18f9c244-a450-535e-adcd-2bfaf85c9b2e none +{% endhighlight %} -



    - -

    Ruby

    +

    +### Ruby -

    Code here

    +Code [here](https://github.com/sckott/tacksize) -
    git clone https://github.com/sckott/tacksize.git
    -cd tacksize
    +{% highlight bash %}
    +git clone https://github.com/sckott/tacksize.git
    +cd tacksize
     gem build tacksize.gemspec
    -gem install ./tacksize-0.0.1.gem
    +gem install ./tacksize-0.0.1.gem +{% endhighlight %} -

    In a Ruby repl, like irb, search for a taxonomic name

    +In a Ruby repl, like `irb`, search for a taxonomic name -
    require 'tacksize'
    -out = Tacksize.gniSearch(:search_term => 'Helianthus annuus')
    -out[0]
    +{% highlight ruby %} +require 'tacksize' +out = Tacksize.gniSearch(:search_term => 'Helianthus annuus') +out[0] +{% endhighlight %} -
    => {"uuid_hex"=>"18f9c244-a450-535e-adcd-2bfaf85c9b2e", "name"=>"Helianthus annuus", "lsid"=>"urn:lsid:globalnames.org:index:18f9c244-a450-535e-adcd-2bfaf85c9b2e", "resource_uri"=>"http://gni.globalnames.org/name_strings/3329657.xml", "id"=>3329657}
    +{% highlight ruby %} +=> {"uuid_hex"=>"18f9c244-a450-535e-adcd-2bfaf85c9b2e", "name"=>"Helianthus annuus", "lsid"=>"urn:lsid:globalnames.org:index:18f9c244-a450-535e-adcd-2bfaf85c9b2e", "resource_uri"=>"http://gni.globalnames.org/name_strings/3329657.xml", "id"=>3329657} +{% endhighlight %} -



    - -

    Python

    - -

    Code here

    - -
    git clone https://github.com/sckott/pytaxize.git
    -cd pytaxize
    -python setup.py install
    - -

    In a Python repl, like ipython, search for a taxonomic name

    - -
    import pytaxize
    -out = pytaxize.gniSearch(name = 'Helianthus annuus')
    -out['name_strings'][0]
    - -
    {u'id': 3329657,
    - u'lsid': u'urn:lsid:globalnames.org:index:18f9c244-a450-535e-adcd-2bfaf85c9b2e',
    - u'name': u'Helianthus annuus',
    - u'resource_uri': u'http://gni.globalnames.org/name_strings/3329657.xml',
    - u'uuid_hex': u'18f9c244-a450-535e-adcd-2bfaf85c9b2e'}
    - -
    - -
    -

    - - Pollinator niche breadth and natural enemies - -

    - - - -

    I am on my way out of academia, so I want to share what I won't ever get around to finishing. I started a paper many years ago examining the prevalence of natural enemy pressure on pollinators, and patterns of occurrence of pollinator natural enemies in relation to plant attributes.

    - -


    - -

    - -

    Anyway, Figshare seemed like the perfect place to put this. I licensed the materials under CC0, so feel free to do whatever you want with it. Check it out here.

    +

    +### Python + +Code [here](https://github.com/sckott/pytaxize) + +{% highlight bash %} +git clone https://github.com/sckott/pytaxize.git +cd pytaxize +python setup.py install +{% endhighlight %} + +In a Python repl, like `ipython`, search for a taxonomic name + +{% highlight python %} +import pytaxize +out = pytaxize.gniSearch(name = 'Helianthus annuus') +out['name_strings'][0] +{% endhighlight %} + +{% highlight python %} +{u'id': 3329657, + u'lsid': u'urn:lsid:globalnames.org:index:18f9c244-a450-535e-adcd-2bfaf85c9b2e', + u'name': u'Helianthus annuus', + u'resource_uri': u'http://gni.globalnames.org/name_strings/3329657.xml', + u'uuid_hex': u'18f9c244-a450-535e-adcd-2bfaf85c9b2e'} +{% endhighlight %}
    diff --git a/_site/page18/index.html b/_site/page18/index.html index 7a6df6d65b..d9a540a30a 100644 --- a/_site/page18/index.html +++ b/_site/page18/index.html @@ -59,6 +59,27 @@

    Recology

      +
    +

    + + Pollinator niche breadth and natural enemies + +

    + + + + I am on my way out of academia, so I want to share what I won't ever get around to finishing. I started a paper many years ago examining the prevalence of natural enemy pressure on pollinators, and patterns of occurrence of pollinator natural enemies in relation to plant attributes. + +
    + +
    + +Anyway, Figshare seemed like the perfect place to put this. I licensed the materials under CC0, so feel free to do whatever you want with it. Check it out [here][figlink]. + +[figlink]: http://figshare.com/articles/Pollinator_niche_breadth_and_natural_enemies/803123 + +
    +

    @@ -68,292 +89,422 @@

    -

    I started an R package a while back, and a few people have shown interest, so I thought it was time to revist the code. govdat is an interface to various APIs for government data: currently the Sunlight Labs APIs, and the New York Times congress API. Returned objects from functions are simple lists. In future versions of govdat, I may change how data is returned. The following are examples (which is also the package vignette) of using the Sunlight Labs API. I will add examples of using the New York Times Congress API once their site is up again; I'm doing this on 2013-08-28, just after the takedown of their site.

    + I started an R package a while back, and a few people have shown interest, so I thought it was time to revist the code. govdat is an interface to various APIs for government data: currently the Sunlight Labs APIs, and the New York Times congress API. Returned objects from functions are simple lists. In future versions of govdat, I may change how data is returned. The following are examples (which is also the package vignette) of using the Sunlight Labs API. I will add examples of using the New York Times Congress API once their site is up again; I'm doing this on 2013-08-28, just after the takedown of their site. + +I show just a bit of each data object returned for brevity. And yes, I realize this is not related at all to ecology. + +You will need an API key to use both Sunlight Labs APIs and the New York Times APIs. Get your API key at Sunlight Labs [here](http://sunlightfoundation.com/api/) and NYT [here](http://developer.nytimes.com/docs/congress_api). You can pass in your key within each function or you can put the key in your .Rprofile file on your machine (which is read from the default R working directory) and the key will be read in automatically inside the function. I recommend the latter option. + +Do let me know of bugs or feature requests over at the Github issues page [here](https://github.com/sckott/govdat/issues). + +******************** + +#### Install govdat -

    I show just a bit of each data object returned for brevity. And yes, I realize this is not related at all to ecology.

    -

    You will need an API key to use both Sunlight Labs APIs and the New York Times APIs. Get your API key at Sunlight Labs here and NYT here. You can pass in your key within each function or you can put the key in your .Rprofile file on your machine (which is read from the default R working directory) and the key will be read in automatically inside the function. I recommend the latter option.

    +{% highlight r %} +install.packages("devtools") +library(devtools) +install_github("govdat", "sckott") +{% endhighlight %} -

    Do let me know of bugs or feature requests over at the Github issues page here.

    -
    +******************** -

    Install govdat

    +#### Load govdat -
    install.packages("devtools")
    -library(devtools)
    -install_github("govdat", "sckott")
    -
    +{% highlight r %} +library(govdat) +{% endhighlight %} -

    Load govdat

    -
    library(govdat)
    +******************** -
    +#### Gets details (subcommittees + membership) for a committee by id. -

    Gets details (subcommittees + membership) for a committee by id.

    -
    key <- getOption("SunlightLabsKey")
    -out <- sll_cg_getcommittees(id = "JSPR")
    -out$response$committee$members[[1]]$legislator[1:5]
    +{% highlight r %} +key <- getOption("SunlightLabsKey") +out <- sll_cg_getcommittees(id = "JSPR") +out$response$committee$members[[1]]$legislator[1:5] +{% endhighlight %} -
    $website
    -[1] "http://www.alexander.senate.gov"
    +
    +
    +{% highlight text %}
    +$website
    +[1] "http://www.alexander.senate.gov"
     
     $fax
    -[1] "202-228-3398"
    +[1] "202-228-3398"
     
     $govtrack_id
    -[1] "300002"
    +[1] "300002"
     
     $firstname
    -[1] "Lamar"
    +[1] "Lamar"
     
     $chamber
    -[1] "senate"
    +[1] "senate" +{% endhighlight %} + + +******************** + +#### Gets a list of all committees that a member serves on, including subcommittes. -
    -

    Gets a list of all committees that a member serves on, including subcommittes.

    +{% highlight r %} +out <- sll_cg_getcommitteesallleg(bioguide_id = "S000148") +out$response$committees[[1]] +{% endhighlight %} -
    out <- sll_cg_getcommitteesallleg(bioguide_id = "S000148")
    -out$response$committees[[1]]
    -
    $committee
    +
    +{% highlight text %}
    +$committee
     $committee$chamber
    -[1] "Senate"
    +[1] "Senate"
     
     $committee$id
    -[1] "SSRA"
    +[1] "SSRA"
     
     $committee$name
    -[1] "Senate Committee on Rules and Administration"
    +[1] "Senate Committee on Rules and Administration" +{% endhighlight %} + + +******************** + +#### Get districts for a latitude/longitude. -
    -

    Get districts for a latitude/longitude.

    +{% highlight r %} +out <- sll_cg_getdistrictlatlong(latitude = 35.778788, longitude = -78.787805) +out$response$districts +{% endhighlight %} -
    out <- sll_cg_getdistrictlatlong(latitude = 35.778788, longitude = -78.787805)
    -out$response$districts
    -
    [[1]]
    +
    +{% highlight text %}
    +[[1]]
     [[1]]$district
     [[1]]$district$state
    -[1] "NC"
    +[1] "NC"
     
     [[1]]$district$number
    -[1] "2"
    +[1] "2" +{% endhighlight %} + + +******************** + +#### Get districts that overlap for a certain zip code. -
    -

    Get districts that overlap for a certain zip code.

    +{% highlight r %} +out <- sll_cg_getdistrictzip(zip = 27511) +out$response$districts +{% endhighlight %} -
    out <- sll_cg_getdistrictzip(zip = 27511)
    -out$response$districts
    -
    [[1]]
    +
    +{% highlight text %}
    +[[1]]
     [[1]]$district
     [[1]]$district$state
    -[1] "NC"
    +[1] "NC"
     
     [[1]]$district$number
    -[1] "2"
    +[1] "2"
     
     
     
     [[2]]
     [[2]]$district
     [[2]]$district$state
    -[1] "NC"
    +[1] "NC"
     
     [[2]]$district$number
    -[1] "4"
    +[1] "4"
     
     
     
     [[3]]
     [[3]]$district
     [[3]]$district$state
    -[1] "NC"
    +[1] "NC"
     
     [[3]]$district$number
    -[1] "13"
    +[1] "13" +{% endhighlight %} + + +******************** + +#### Search congress people and senate members. -
    -

    Search congress people and senate members.

    +{% highlight r %} +out <- sll_cg_getlegislatorsearch(name = "Reed") +out$response$results[[1]]$result$legislator[1:5] +{% endhighlight %} -
    out <- sll_cg_getlegislatorsearch(name = "Reed")
    -out$response$results[[1]]$result$legislator[1:5]
    -
    $website
    -[1] "http://www.reed.senate.gov"
    +
    +{% highlight text %}
    +$website
    +[1] "http://www.reed.senate.gov"
     
     $fax
    -[1] "202-224-4680"
    +[1] "202-224-4680"
     
     $govtrack_id
    -[1] "300081"
    +[1] "300081"
     
     $firstname
    -[1] "John"
    +[1] "John"
     
     $chamber
    -[1] "senate"
    +[1] "senate" +{% endhighlight %} + + +******************** + +#### Search congress people and senate members for a zip code. + -
    +{% highlight r %} +out <- sll_cg_legislatorsallforzip(zip = 77006) +library(plyr) +ldply(out$response$legislators, function(x) data.frame(x$legislator[c("firstname", + "lastname")])) +{% endhighlight %} -

    Search congress people and senate members for a zip code.

    -
    out <- sll_cg_legislatorsallforzip(zip = 77006)
    -library(plyr)
    -ldply(out$response$legislators, function(x) data.frame(x$legislator[c("firstname", 
    -    "lastname")]))
    -
      firstname    lastname
    +{% highlight text %}
    +  firstname    lastname
     1    Sheila Jackson Lee
     2       Ted        Cruz
     3      John      Cornyn
    -4       Ted         Poe
    +4 Ted Poe +{% endhighlight %} + + +******************** + +#### Find the popularity of a phrase over a period of time. + +##### Get a list of how many times the phrase "united states" appears in the Congressional Record in each month between January and June, 2010: -
    -

    Find the popularity of a phrase over a period of time.

    +{% highlight r %} +sll_cw_timeseries(phrase = "united states", start_date = "2009-01-01", end_date = "2009-04-30", + granularity = "month") +{% endhighlight %} -
    Get a list of how many times the phrase "united states" appears in the Congressional Record in each month between January and June, 2010:
    -
    sll_cw_timeseries(phrase = "united states", start_date = "2009-01-01", end_date = "2009-04-30", 
    -    granularity = "month")
    -
    4 records returned
    +{% highlight text %} +4 records returned +{% endhighlight %} -
      count      month
    +
    +
    +{% highlight text %}
    +  count      month
     1  3805 2009-01-01
     2  3512 2009-02-01
     3  6018 2009-03-01
    -4  2967 2009-04-01
    +4 2967 2009-04-01 +{% endhighlight %} + + +##### Plot data + + +{% highlight r %} +library(ggplot2) +dat <- sll_cw_timeseries(phrase = "climate change") +{% endhighlight %} + + -
    Plot data
    +{% highlight text %} +1354 records returned +{% endhighlight %} -
    library(ggplot2)
    -dat <- sll_cw_timeseries(phrase = "climate change")
    -
    1354 records returned
    -
    ggplot(dat, aes(day, count)) + geom_line() + theme_grey(base_size = 20)
    +{% highlight r %} +ggplot(dat, aes(day, count)) + geom_line() + theme_grey(base_size = 20) +{% endhighlight %} -

    center

    +![center](/public/img/2013-08-28-govdat-vignette/sll_cw_timeseries2.png) -
    Plot more data
    -
    dat_d <- sll_cw_timeseries(phrase = "climate change", party = "D")
    +##### Plot more data -
    908 records returned
    -
    dat_d$party <- rep("D", nrow(dat_d))
    -dat_r <- sll_cw_timeseries(phrase = "climate change", party = "R")
    +{% highlight r %} +dat_d <- sll_cw_timeseries(phrase = "climate change", party = "D") +{% endhighlight %} -
    623 records returned
    -
    dat_r$party <- rep("R", nrow(dat_r))
    -dat_both <- rbind(dat_d, dat_r)
    -ggplot(dat_both, aes(day, count, colour = party)) + geom_line() + theme_grey(base_size = 20) + 
    -    scale_colour_manual(values = c("blue", "red"))
    -

    center

    +{% highlight text %} +908 records returned +{% endhighlight %} -
    -

    Search OpenStates bills.

    -
    out <- sll_os_billsearch(terms = "agriculture", state = "tx", chamber = "upper")
    -lapply(out, "[[", "title")[100:110]
    +{% highlight r %} +dat_d$party <- rep("D", nrow(dat_d)) +dat_r <- sll_cw_timeseries(phrase = "climate change", party = "R") +{% endhighlight %} -
    [[1]]
    -[1] "Relating to the sale by the Brazos River Authority of certain property at Possum Kingdom Lake."
    +
    +
    +{% highlight text %}
    +623 records returned
    +{% endhighlight %}
    +
    +
    +
    +{% highlight r %}
    +dat_r$party <- rep("R", nrow(dat_r))
    +dat_both <- rbind(dat_d, dat_r)
    +ggplot(dat_both, aes(day, count, colour = party)) + geom_line() + theme_grey(base_size = 20) + 
    +    scale_colour_manual(values = c("blue", "red"))
    +{% endhighlight %}
    +
    +![center](/public/img/2013-08-28-govdat-vignette/sll_cw_timeseries3.png) 
    +
    +
    +********************
    +
    +#### Search OpenStates bills.
    +
    +
    +{% highlight r %}
    +out <- sll_os_billsearch(terms = "agriculture", state = "tx", chamber = "upper")
    +lapply(out, "[[", "title")[100:110]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +[[1]]
    +[1] "Relating to the sale by the Brazos River Authority of certain property at Possum Kingdom Lake."
     
     [[2]]
    -[1] "Proposing a constitutional amendment providing immediate additional revenue for the state budget by creating the Texas Gaming Commission, and authorizing and regulating the operation of casino games and slot machines by a limited number of licensed operators and certain Indian tribes."
    +[1] "Proposing a constitutional amendment providing immediate additional revenue for the state budget by creating the Texas Gaming Commission, and authorizing and regulating the operation of casino games and slot machines by a limited number of licensed operators and certain Indian tribes."
     
     [[3]]
    -[1] "Relating to production requirements for holders of winery permits."
    +[1] "Relating to production requirements for holders of winery permits."
     
     [[4]]
    -[1] "Relating to the use of human remains in the training of search and rescue animals."
    +[1] "Relating to the use of human remains in the training of search and rescue animals."
     
     [[5]]
    -[1] "Relating to end-of-course assessment instruments administered to public high school students and other measures of secondary-level performance."
    +[1] "Relating to end-of-course assessment instruments administered to public high school students and other measures of secondary-level performance."
     
     [[6]]
    -[1] "Relating to public high school graduation, including curriculum and assessment requirements for graduation and funding in support of certain curriculum authorized for graduation."
    +[1] "Relating to public high school graduation, including curriculum and assessment requirements for graduation and funding in support of certain curriculum authorized for graduation."
     
     [[7]]
    -[1] "Relating to certain residential and other structures and mitigation of loss to those structures resulting from natural catastrophes; providing a criminal penalty."
    +[1] "Relating to certain residential and other structures and mitigation of loss to those structures resulting from natural catastrophes; providing a criminal penalty."
     
     [[8]]
    -[1] "Recognizing March 28, 2013, as Texas Water Conservation Day at the State Capitol."
    +[1] "Recognizing March 28, 2013, as Texas Water Conservation Day at the State Capitol."
     
     [[9]]
    -[1] "Recognizing March 26, 2013, as Lubbock Day at the State Capitol."
    +[1] "Recognizing March 26, 2013, as Lubbock Day at the State Capitol."
     
     [[10]]
    -[1] "In memory of Steve Jones."
    +[1] "In memory of Steve Jones."
     
     [[11]]
    -[1] "Relating to the regulation of dangerous wild animals."
    +[1] "Relating to the regulation of dangerous wild animals." +{% endhighlight %} + + +******************** -
    +#### Search Legislators on OpenStates. -

    Search Legislators on OpenStates.

    -
    out <- sll_os_legislatorsearch(state = "tx", party = "democratic", active = TRUE)
    -out[[1]][1:5]
    +{% highlight r %} +out <- sll_os_legislatorsearch(state = "tx", party = "democratic", active = TRUE) +out[[1]][1:5] +{% endhighlight %} -
    $last_name
    -[1] "Naishtat"
    +
    +
    +{% highlight text %}
    +$last_name
    +[1] "Naishtat"
     
     $updated_at
    -[1] "2013-08-29 03:03:22"
    +[1] "2013-08-29 03:03:22"
     
     $nimsp_candidate_id
    -[1] "112047"
    +[1] "112047"
     
     $full_name
    -[1] "Elliott Naishtat"
    +[1] "Elliott Naishtat"
     
     $`+district_address`
    -[1] " P.O. Box 2910\nAustin, TX 78768\n(512) 463-0668"
    +[1] " P.O. Box 2910\nAustin, TX 78768\n(512) 463-0668" +{% endhighlight %} + -
    +******************** -

    Search for entities - that is, politicians, individuals, or organizations with the given name

    +#### Search for entities - that is, politicians, individuals, or organizations with the given name -
    out <- sll_ts_aggregatesearch("Nancy Pelosi")
    -out <- lapply(out, function(x) {
    -    x[sapply(x, is.null)] <- "none"
    +
    +{% highlight r %}
    +out <- sll_ts_aggregatesearch("Nancy Pelosi")
    +out <- lapply(out, function(x) {
    +    x[sapply(x, is.null)] <- "none"
         x
    -})
    -ldply(out, data.frame)
    +}) +ldply(out, data.frame) +{% endhighlight %} + -
                           name count_given firm_income count_lobbied          seat
    +
    +{% highlight text %}
    +                       name count_given firm_income count_lobbied          seat
     1          Nancy Pelosi (D)           0           0             0 federal:house
     2 Nancy Pelosi for Congress           7           0             0          none
       total_received state lobbying_firm count_received party total_given         type
     1       14173534    CA          none          10054     D           0   politician
    -2              0  none          <NA>              0  none        7250 organization
    +2              0  none                        0  none        7250 organization
                                     id non_firm_spending is_superpac
     1 85ab2e74589a414495d18cc7a9233981                 0        none
    -2 afb432ec90454c8a83a3113061e7be27                 0        <NA>
    +2 afb432ec90454c8a83a3113061e7be27 0 +{% endhighlight %} + + +******************** + +#### Return the top contributoring organizations, ranked by total dollars given. An organization's giving is broken down into money given directly (by the organization's PAC) versus money given by individuals employed by or associated with the organization. -
    -

    Return the top contributoring organizations, ranked by total dollars given. An organization's giving is broken down into money given directly (by the organization's PAC) versus money given by individuals employed by or associated with the organization.

    +{% highlight r %} +out <- sll_ts_aggregatetopcontribs(id = "85ab2e74589a414495d18cc7a9233981") +ldply(out, data.frame) +{% endhighlight %} -
    out <- sll_ts_aggregatetopcontribs(id = "85ab2e74589a414495d18cc7a9233981")
    -ldply(out, data.frame)
    -
       employee_amount total_amount total_count                                     name
    +
    +{% highlight text %}
    +   employee_amount total_amount total_count                                     name
     1         64000.00    101300.00          79                         Akin, Gump et al
     2          3500.00     90000.00          29 American Fedn of St/Cnty/Munic Employees
     3                0     86600.00          48                National Assn of Realtors
    @@ -374,7 +525,9 @@ 

    Return the top contributoring organizations, ranked by total dollars given. 7 36 0 390767dc6b4b491ca775b1bdf8a36eea 76000.00 8 18 0 b53b4ad137d743a996f4d7467700fc88 72500.00 9 21 0 425be85642b24cc2bc3d8a0bb3c7bc92 72500.00 -10 20 11 793070ae7f5e42c2a76a58663a588f3d 64000.00

    +10 20 11 793070ae7f5e42c2a76a58663a588f3d 64000.00 +{% endhighlight %} +
    @@ -387,177 +540,29 @@

    -

    ScienceOnline Climate

    - -

    I recently attended ScienceOnline Climate, a conference in Washington, D.C. at AAAS offices. You may have heard of the ScienceOnline annual meeting in North Carolina - this was one of their topical meetings focused on Climate Change. Another one is coming up in October, ScienceOnline Oceans. Search Twitter for #scioClimate (or the entire list of hashtags here) for tweets from the conference.

    - -

    One of the sessions I attended was focused on how to democratize climate change knowledge, moderated by a fellow from the Union of Concerned Scientists. Search Twitter for #sciodemocracy to see the conversation from that session. There was a lot of very interesting discussion.

    - -

    Can we reach the public with phenology data?

    - -

    During the #sciodemocracy session, I had a thought but couldn't articulate it at the time. So here goes. People that are not inolved in climate change discussions may not think about climate change in the framework of changing sea level, melting ice, and altered severity of extreme events. However, many people observe birds, butterflies, and trees outside their apartment windows, cars/trains/buses, or on walks or hikes. When you live in a one place for many years changes in the timing of when birds, butterflies, and trees do certain things are easily noticed. Many of us, including myself, don't necessarily record these changes, but some do! In fact, there are many web sites with databases of observations of birds, butterflies, and more that anyone, not just scientists, can submit observations to. Some examples are the USA National Phenology Network and iNaturalist. And of course there are other databases that are focused on observations of organisms collected by scientists, like GBIF and VertNet.

    - -

    So what? What about it?

    - -

    When enough of these observations are collected on any one species in one location (e.g., let's say we have 1000 observations of a species in Seattle over 20 years) we can simply ask how has the first date of record of the species in Seattle changed through time? If there is a change in timing of first appearance in the spring through the years, we can hypothesize that this may be due to climate change, and look at the data to see if there is a correlation, etc.

    - -

    Non-scientists along with scientists are collecting vast amounts of data on observations of species. This data can be used to make people think about climate change. That is, why don't we not only facilitate the public's ability to collect data, but also to analyze the data - to do their own science, ask their own questions. In this way, people can link a bird appearing for the first time in spring a bit later than the previous year, or a tree flowering a bit early, to variables associated with climate change, like temperature, precipitation, etc.

    - -

    Empowering the general public to do their own science may bring the vague notion of climate change into stark relief - thereby movivating some to take action with their elected representatives, or to at least get curious to find out more.

    - -
    - -
    -

    - - Working with climate data from the web in R - -

    - - - -

    I recently attended ScienceOnline Climate, a conference in Washington, D.C. at AAAS. You may have heard of the ScienceOnline annual meeting in North Carolina - this was one of their topical meetings focused on Climate Change. I moderated a session on working with data from the web in R, focusing on climate data. Search Twitter for #scioClimate for tweets from the conference, and #sciordata for tweets from the session I ran. The following is an abbreviated demo of what I did in the workshop showing some of what you can do with climate data in R using our packages.

    - -

    Before digging in, why would you want to get climate data programatically vs. via pushing buttons in a browser? Learning a programming language can take some time - we all already know how to use browsers. So why?! First, getting data programatically, especially in R (or Python), allows you to then easily do other stuff, like manipulate data, visualize, and analyze data. Second, if you do your work programatically, you and others can reproduce, and extend, the work you did with little extra effort. Third, programatically getting data makes tasks that are repetitive and slow, fast and easy - you can't easily automate button clicks in a browser. Fourth, you can combine code with writing to make your entire workflow reproducible, whether it's notes, a blog post, or even a research article.

    - -

    Interactive visualizations in R

    - -

    Let's start off with something shiny. The majority of time I make static visualizations, which are great for me to look at during analyses, and for publications of research findings in PDFs. However, static visualizations don't take advantage of the interactive nature of the web. Ramnath Vaidyanathan has developed an R package, rCharts, to generate dynamic Javascript visualizations directly from R that can be used interactively in a browser. Here is an example visualizing a dataset that comes with R.

    - -
    library(devtools)
    -install_github("rCharts", "ramnathv")
    -library(rCharts)
    -
    -# Load a data set
    -hair_eye_male <- subset(as.data.frame(HairEyeColor), Sex == "Male")
    -
    -# Make a javascript plot object
    -n1 <- nPlot(Freq ~ Hair, group = "Eye", data = hair_eye_male, type = "multiBarChart")
    -
    -# Visualize
    -n1$show(cdn = TRUE)
    - -

    Check out the output here. If you like you can take the source code from the visualization (right click on select View Page Source) and put it in your html files, and you're good to go (as long as you have dependencies, etc.) - quicker than learning d3 and company from scratch, eh. This is a super simple example, but you can imagine the possibilities.

    - -

    The data itself

    - -

    First, install some packages - these are all just on Github, so you need to have devtools installed

    - -
    library(devtools)
    -install_github("govdat", "sckott")
    -install_github("rnoaa", "ropensci")
    -install_github("rWBclimate", "ropensci")
    -install_github("rnpn", "ropensci")
    - -

    Politicians talk - Sunlight Foundation listens

    - -

    Look at mentions of the phrase "climate change" in congress, using the govdat package

    - -
    library(govdat)
    -library(ggplot2)
    -
    -# Get mentions of climate change from Democrats
    -dat_d <- sll_cw_timeseries(phrase = "climate change", party = "D")
    -
    -# Add a column that says this is data from deomcrats
    -dat_d$party <- rep("D", nrow(dat_d))
    -
    -# Get mentions of climate change from Democrats
    -dat_r <- sll_cw_timeseries(phrase = "climate change", party = "R")
    -
    -# Add a column that says this is data from republicans
    -dat_r$party <- rep("R", nrow(dat_r))
    -
    -# Put two tables together
    -dat_both <- rbind(dat_d, dat_r)
    -
    -# Plot data
    -ggplot(dat_both, aes(day, count, colour = party)) + theme_grey(base_size = 20) + 
    -    geom_line() + scale_colour_manual(values = c("blue", "red"))
    - -

    center

    - -

    NOAA climate data, using the rnoaa package

    - -

    Map sea ice for 12 years, for April only, for the North pole

    - -
    library(rnoaa)
    -library(scales)
    -library(ggplot2)
    -library(doMC)
    -library(plyr)
    -
    -# Get URLs for data
    -urls <- seaiceeurls(mo = "Apr", pole = "N")[1:12]
    -
    -# Download sea ice data
    -registerDoMC(cores = 4)
    -out <- llply(urls, noaa_seaice, storepath = "~/seaicedata", .parallel = TRUE)
    -
    -# Name elements of list
    -names(out) <- seq(1979, 1990, 1)
    -
    -# Make a data.frame
    -df <- ldply(out)
    -
    -# Plot data
    -ggplot(df, aes(long, lat, group = group)) + geom_polygon(fill = "steelblue") + 
    -    theme_ice() + facet_wrap(~.id)
    - -

    center

    - -

    World Bank climate data, using the rWBclimate package

    - -

    Plotting annual data for different countries

    - -

    Data can be extracted from countries or basins submitted as vectors. Here we will plot the expected temperature anomaly for each 20 year period over a baseline control period of 1961-2000. These countries chosen span the north to south pole. It's clear from the plot that the northern most countries (US and Canada) have the biggest anomaly, and Belize, the most equatorial country, has the smallest anomaly.

    - -
    library(rWBclimate)
    -
    -# Search for data
    -country.list <- c("CAN", "USA", "MEX", "BLZ", "ARG")
    -country.dat <- get_model_temp(country.list, "annualanom", 2010, 2100)
    -
    -# Subset data to one specific model
    -country.dat.bcc <- country.dat[country.dat$gcm == "bccr_bcm2_0", ]
    -
    -# Exclude A2 scenario
    -country.dat.bcc <- subset(country.dat.bcc, country.dat.bcc$scenario != "a2")
    -
    -# Plot data
    -ggplot(country.dat.bcc, aes(x = fromYear, y = data, group = locator, colour = locator)) + 
    -    geom_point() + geom_path() + ylab("Temperature anomaly over baseline") + 
    -    theme_bw(base_size = 20)
    - -

    center

    + ## ScienceOnline Climate -

    Phenology data from the USA National Phenology Network, using rnpn

    +I recently attended ScienceOnline Climate, a conference in Washington, D.C. at AAAS offices. You may have heard of the ScienceOnline annual meeting in North Carolina - this was one of their topical meetings focused on Climate Change. Another one is coming up in October, ScienceOnline Oceans. Search Twitter for \#scioClimate (or the entire list of hashtags [here][tags]) for tweets from the conference. -
    library(rnpn)
    +One of the sessions I attended was focused on how to democratize climate change knowledge, moderated by a fellow from the Union of Concerned Scientists. Search Twitter for \#sciodemocracy to see the conversation from that session. There was a lot of very interesting discussion.
     
    -# Lookup names
    -temp <- lookup_names(name = "bird", type = "common")
    -comnames <- temp[temp$species_id %in% c(357, 359, 1108), "common_name"]
    +## Can we reach the public with phenology data?
     
    -# Get some data
    -out <- getobsspbyday(speciesid = c(357, 359, 1108), startdate = "2010-04-01", 
    -    enddate = "2013-09-31")
    -names(out) <- comnames
    -df <- ldply(out)
    -df$date <- as.Date(df$date)
    +During the \#sciodemocracy session, I had a thought but couldn't articulate it at the time. So here goes. People that are not inolved in climate change discussions may not think about climate change in the framework of changing sea level, melting ice, and altered severity of extreme events. However, many people observe birds, butterflies, and trees outside their apartment windows, cars/trains/buses, or on walks or hikes. When you live in a one place for many years changes in the timing of when birds, butterflies, and trees do certain things are easily noticed. Many of us, including myself, don't necessarily record these changes, but some do! In fact, there are many web sites with databases of observations of birds, butterflies, and more that anyone, not just scientists, can submit observations to. Some examples are the [USA National Phenology Network][usnpn] and [iNaturalist][inat]. And of course there are other databases that are focused on observations of organisms collected by scientists, like [GBIF][gbif] and [VertNet][vertnet]. 
     
    -# Visualize data
    -library(ggplot2)
    -ggplot(df, aes(date, count)) + geom_line() + theme_grey(base_size = 20) + facet_grid(.id ~ 
    -    .)
    +So what? What about it? -

    center

    +When enough of these observations are collected on any one species in one location (e.g., let's say we have 1000 observations of a species in Seattle over 20 years) we can simply ask how has the first date of record of the species in Seattle changed through time? If there is a change in timing of first appearance in the spring through the years, we can hypothesize that this may be due to climate change, and look at the data to see if there is a correlation, etc. -

    Feedback and new climate data sources

    +Non-scientists along with scientists are collecting vast amounts of data on observations of species. This data can be used to make people think about climate change. That is, why don't we not only facilitate the public's ability to collect data, but also to analyze the data - to do their own science, ask their own questions. In this way, people can link a bird appearing for the first time in spring a bit later than the previous year, or a tree flowering a bit early, to variables associated with climate change, like temperature, precipitation, etc. -

    Do use the above pacakges (govdat, rnoaa, rWBclimate, and rnpn) to get climate data, and get in touch with bug reports, and feature requests.

    +Empowering the general public to do their own science may bring the vague notion of climate change into stark relief - thereby movivating some to take action with their elected representatives, or to at least get curious to find out more. -

    Surely there are other sources of climate data out there that you want to use in R, right? Let us know what else you want to use. Better yet, if you can sling some R code, start writing your own package to interact with a source of climate data on the web - we can lend a hand.

    +[tags]: https://gist.github.com/sckott/6213308 +[usnpn]: https://www.usanpn.org/ +[inat]: http://www.inaturalist.org/ +[gbif]: http://www.gbif.org/ +[vertnet]: http://vertnet.org/index.php
    diff --git a/_site/page19/index.html b/_site/page19/index.html index e892b22b2f..d688e7208f 100644 --- a/_site/page19/index.html +++ b/_site/page19/index.html @@ -61,87 +61,249 @@

    Recology

    - - R ecology workshop + + Working with climate data from the web in R

    - + + + I recently attended [ScienceOnline Climate][sciocweb], a conference in Washington, D.C. at AAAS. You may have heard of the [ScienceOnline annual meeting in North Carolina][sciox] - this was one of their topical meetings focused on Climate Change. I moderated a session on [working with data from the web in R][sciordata], focusing on climate data. Search Twitter for \#scioClimate for tweets from the conference, and \#sciordata for tweets from the session I ran. The following is an abbreviated demo of what I did in the workshop showing some of what you can do with climate data in R using our packages. + +Before digging in, why would you want to get climate data programatically vs. via pushing buttons in a browser? Learning a programming language can take some time - we all already know how to use browsers. So why?! First, getting data programatically, especially in R (or Python), allows you to then easily do other stuff, like manipulate data, visualize, and analyze data. Second, if you do your work programatically, **you** and *others* can reproduce, and extend, the work you did with little extra effort. Third, programatically getting data makes tasks that are repetitive and slow, fast and easy - you can't easily automate button clicks in a browser. Fourth, you can combine code with writing to make your entire workflow reproducible, whether it's notes, a blog post, or even a research article. + +## Interactive visualizations in R + +Let's start off with something shiny. The majority of time I make static visualizations, which are great for me to look at during analyses, and for publications of research findings in PDFs. However, static visualizations don't take advantage of the interactive nature of the web. Ramnath Vaidyanathan has developed an R package, [rCharts][rcharts], to generate dynamic Javascript visualizations directly from R that can be used interactively in a browser. Here is an example visualizing a dataset that comes with R. + + +{% highlight r %} +library(devtools) +install_github("rCharts", "ramnathv") +library(rCharts) + +# Load a data set +hair_eye_male <- subset(as.data.frame(HairEyeColor), Sex == "Male") + +# Make a javascript plot object +n1 <- nPlot(Freq ~ Hair, group = "Eye", data = hair_eye_male, type = "multiBarChart") + +# Visualize +n1$show(cdn = TRUE) +{% endhighlight %} + + +Check out the output [here][rchartsout]. If you like you can take the source code from the visualization (right click on select *View Page Source*) and put it in your html files, and you're good to go (as long as you have dependencies, etc.) - quicker than learning [d3][d3] and company from scratch, eh. This is a super simple example, but you can imagine the possibilities. + + +## The data itself + + +### First, install some packages - these are all just on Github, so you need to have devtools installed + + +{% highlight r %} +library(devtools) +install_github("govdat", "sckott") +install_github("rnoaa", "ropensci") +install_github("rWBclimate", "ropensci") +install_github("rnpn", "ropensci") +{% endhighlight %} + + + +### Politicians talk - Sunlight Foundation listens + +#### Look at mentions of the phrase "climate change" in congress, using the govdat package + + +{% highlight r %} +library(govdat) +library(ggplot2) + +# Get mentions of climate change from Democrats +dat_d <- sll_cw_timeseries(phrase = "climate change", party = "D") + +# Add a column that says this is data from deomcrats +dat_d$party <- rep("D", nrow(dat_d)) + +# Get mentions of climate change from Democrats +dat_r <- sll_cw_timeseries(phrase = "climate change", party = "R") + +# Add a column that says this is data from republicans +dat_r$party <- rep("R", nrow(dat_r)) + +# Put two tables together +dat_both <- rbind(dat_d, dat_r) + +# Plot data +ggplot(dat_both, aes(day, count, colour = party)) + theme_grey(base_size = 20) + + geom_line() + scale_colour_manual(values = c("blue", "red")) +{% endhighlight %} + +![center](/public/img/2013-08-17-sciordata/govdat.png) + + + +### NOAA climate data, using the rnoaa package + +#### Map sea ice for 12 years, for April only, for the North pole + + +{% highlight r %} +library(rnoaa) +library(scales) +library(ggplot2) +library(doMC) +library(plyr) + +# Get URLs for data +urls <- seaiceeurls(mo = "Apr", pole = "N")[1:12] + +# Download sea ice data +registerDoMC(cores = 4) +out <- llply(urls, noaa_seaice, storepath = "~/seaicedata", .parallel = TRUE) -

    After my presentation yesterday to a group of grad students on R resources, I did a presentation today on intro to R data manipulation, visualizations, and analyses/visualizations of biparite networks and community level analyses (diversity, rarefaction, ordination, etc.). As I said yesterday I've been playing with two ways to make reproducible presentations in R: RStudio's presentations built in to RStudio IDE, and Slidify. Yesterday I went with RStudio's product - today I used Slidify. See the Markdown file for the presentation here.

    +# Name elements of list +names(out) <- seq(1979, 1990, 1) -

    Check out the presentation slides here, and if you want, fork the code on Github, change it, submit changes back to me, etc. (click on the image to go to slides)

    +# Make a data.frame +df <- ldply(out) -

    How I actually ran the 2 hr workshop was to present a few slides, then live demo writing the code out with students following along, with a number of times where they do something on their own.

    +# Plot data +ggplot(df, aes(long, lat, group = group)) + geom_polygon(fill = "steelblue") + + theme_ice() + facet_wrap(~.id) +{% endhighlight %} -


    +![center](/public/img/2013-08-17-sciordata/seaice2.png) -

    + + +### World Bank climate data, using the rWBclimate package + +#### Plotting annual data for different countries + +Data can be extracted from countries or basins submitted as vectors. Here we will plot the expected temperature anomaly for each 20 year period over a baseline control period of 1961-2000. These countries chosen span the north to south pole. It's clear from the plot that the northern most countries (US and Canada) have the biggest anomaly, and Belize, the most equatorial country, has the smallest anomaly. + + +{% highlight r %} +library(rWBclimate) + +# Search for data +country.list <- c("CAN", "USA", "MEX", "BLZ", "ARG") +country.dat <- get_model_temp(country.list, "annualanom", 2010, 2100) + +# Subset data to one specific model +country.dat.bcc <- country.dat[country.dat$gcm == "bccr_bcm2_0", ] + +# Exclude A2 scenario +country.dat.bcc <- subset(country.dat.bcc, country.dat.bcc$scenario != "a2") + +# Plot data +ggplot(country.dat.bcc, aes(x = fromYear, y = data, group = locator, colour = locator)) + + geom_point() + geom_path() + ylab("Temperature anomaly over baseline") + + theme_bw(base_size = 20) +{% endhighlight %} + +![center](/public/img/2013-08-17-sciordata/unnamed-chunk-1.png) + + + + +### Phenology data from the USA National Phenology Network, using rnpn + + +{% highlight r %} +library(rnpn) + +# Lookup names +temp <- lookup_names(name = "bird", type = "common") +comnames <- temp[temp$species_id %in% c(357, 359, 1108), "common_name"] + +# Get some data +out <- getobsspbyday(speciesid = c(357, 359, 1108), startdate = "2010-04-01", + enddate = "2013-09-31") +names(out) <- comnames +df <- ldply(out) +df$date <- as.Date(df$date) + +# Visualize data +library(ggplot2) +ggplot(df, aes(date, count)) + geom_line() + theme_grey(base_size = 20) + facet_grid(.id ~ + .) +{% endhighlight %} + +![center](/public/img/2013-08-17-sciordata/rnpn.png) + + + +### Feedback and new climate data sources + +Do use the above pacakges ([govdat][govdat], [rnoaa][rnoaa], [rWBclimate][rWBclimate], and [rnpn][rnpn]) to get climate data, and get in touch with bug reports, and feature requests. + +Surely there are other sources of climate data out there that you want to use in R, right? Let us know what else you want to use. Better yet, if you can sling some R code, start writing your own package to interact with a source of climate data on the web - we can lend a hand. + +[sciocweb]: http://scioclimate.wikispaces.com +[sciox]: https://twitter.com/#sciox +[rchartsout]: http://recology.info/vis/nvd3_eg.html +[rcharts]: https://github.com/ramnathv/rCharts +[sciordata]: http://scioclimate.wikispaces.com/3W.+Working+With+Science+Data+From+Around+The+Web +[d3]: http://d3js.org/ +[govdat]: https://github.com/sckott/govdat +[rnoaa]: https://github.com/ropensci/rnoaa +[rWBclimate]: https://github.com/ropensci/rWBclimate +[rnpn]: https://github.com/ropensci/rnpn

    - - R resources + + R ecology workshop

    - - -

    I'm doing a presentation today to grad students on R resources. I have been writing HTML presentations recently, but some great tools are now available to convert text that is easy to read and write to presentations.

    + -
      -
    • RStudio has something called R presentations, that is basically Markdown. This tool is built in to RStudio. See some docs here. A cool feature of RStudio's presentations is that the preview of the presentation live updates on each save - nice
    • -
    • Another option is the slidify package, made by Ramnath Vaidyanathan. The canonical url for slidify is here. Slidify gives you more options and flexibity than RStudio presentations.
    • -
    + After [my presentation yesterday][last] to a group of grad students on R resources, I did a presentation today on intro to R data manipulation, visualizations, and analyses/visualizations of biparite networks and community level analyses (diversity, rarefaction, ordination, etc.). As I said [yesterday][last] I've been playing with two ways to make reproducible presentations in R: [RStudio's presentations][rstudio] built in to RStudio IDE, and [Slidify][slidify]. Yesterday I went with RStudio's product - today I used Slidify. See the Markdown file for the presentation [here](https://github.com/sckott/posterstalks/blob/gh-pages/sfu/resources/r_resources.Rpres). -

    For this presentation I went with RStudio's product. See the Markdown file for the presentation here.

    +Check out the presentation slides [here](https://bitly.com/sfuworkshop), and if you want, fork [the code on Github](http://bit.ly/1bKVX2O), change it, submit changes back to me, etc. (click on the image to go to slides) -

    Check out the presentation slides here, and if you want, fork it on Github, change it, submit changes back to me, etc. (click to go to slides)

    +How I actually ran the 2 hr workshop was to present a few slides, then live demo writing the code out with students following along, with a number of times where they do something on their own. -


    +
    - + -

    +[last]: http://sckott.github.io/2013/07/r-resources/ +[rstudio]: http://www.rstudio.com/ide/docs/presentations/overview +[slidify]: http://slidify.org/

    - - Beyond academia + + R resources

    - - -

    As ecologists, we often start graduate school worshiping the ivory tower of academia with its freedom to pursue important ecological questions. However, studies have shown that most of us do not end up in academia. Greater numbers of ecology graduates are leaving the ivory tower for non-academic career paths. But for many graduates, moving from an academic environment to a non-academic job may be difficult. In graduate school we are trained to work in a particular way, often with loose deadlines and unlimited intellectual freedom (within reason of course). The culture and expectations of the non-academic world may be quite different. What are the skills that you need in a government job, or in science journalism? How do you market yourself for a non-academic position? This is a timely topic because funding to academic ecologists is being cut, leaving fewer opportunities in the academic arena. In fact, an ESA Student Section survey found that an ESA 2013 session on non-academic career paths in ecology was the topic of greatest interest.

    + -

    Sandra Chung and I organized an ESA lunchtime session on Tuesday the 6th with panelists from an array of non-academic careers to offer advice and share their experiences. Each panelist will speak briefly, introducing themselves and a bit about what they do. About half of the time will be reserved for an open discussion in which you all attending the session help decide what to talk about.

    + I'm doing a presentation today to grad students on R resources. I have been writing HTML presentations recently, but some great tools are now available to convert text that is easy to read and write to presentations. -

    You can find the description of the session at the ESA site here.

    ++ RStudio has something called `R presentations`, that is basically Markdown. This tool is built in to RStudio. See some docs [here](http://www.rstudio.com/ide/docs/presentations/overview). A cool feature of RStudio's presentations is that the preview of the presentation live updates on each save - nice ++ Another option is the slidify package, made by [Ramnath Vaidyanathan](https://github.com/ramnathv). The canonical url for slidify is [here](http://slidify.org/). Slidify gives you more options and flexibity than RStudio presentations. -

    The details:

    +For this presentation I went with RStudio's product. See the Markdown file for the presentation [here](https://github.com/sckott/posterstalks/blob/gh-pages/sfu/resources/r_resources.Rpres). -
      -
    • When: Tuesday, August 6, 2013: 11:30 AM-1:15 PM
    • -
    • Where: 101B, Minneapolis Convention Center
    • -
    • Who (the panelists): +Check out the presentation slides here, and if you want, fork it on Github, change it, submit changes back to me, etc. (click to go to slides) -
        -
      • Virginia Gewin: Independent science journalist (w/ work in Science/Nature/etc.)
      • -
      • Liz Neeley: Science communication/journalism at COMPASS (a non-profit org.)
      • -
      • Joe Simonis: Research scientist at the Lincon Park Zoo (a non-profit org.)
      • -
      • Ted Hart: Soon to be statistician at NEON
      • -
      • Lael Goodman: Analyst w/ the Union of Concerned Scientists
      • -
    • -
    +
    -

    Get involved

    + -

    As a placeholder on the web for things related to Beyond Academia, and a place to find out more about the session at ESA, we started a wiki. Check it out here. Please do visit the wiki, and contribute your ideas for topics to discuss. In addition, we have a Resources page on the wiki here to collect resources related to moving beyond academia.

    +
    diff --git a/_site/page2/index.html b/_site/page2/index.html index c5b794bfb3..9e92e3d0dc 100644 --- a/_site/page2/index.html +++ b/_site/page2/index.html @@ -61,340 +61,414 @@

    Recology

    - - Metrics for open source projects + + noaa - Integrated Surface Database data

    - + + + I've recently made some improvements to the functions that work with ISD +(Integrated Surface Database) data. + +__isd data__ + +* The `isd()` function now caches more intelligently. We now cache using +`.rds` files via `saveRDS`/`readRDS`, whereas we used to use `.csv` files, +which take up much more disk space, and we have to worry about not changing +data formats on reading data back into an R session. This has the downside +that you can't just go directly to open up a cached file in your favorite +spreadsheet viewer, but you can do that manually after reading in to R. +* In addition, `isd()` now has a function `cleanup`, if `TRUE` after +downloading the data file from NOAA's ftp server and processing, we delete +the file. That's fine since we have the cached processed file. But you +can choose not to cleanup the original data files. +* Data processing in `isd()` is improved as well. We convert key variables +to appropriate classes to be more useful. + +__isd stations__ + +* In `isd_stations()`, there's now a cached version of the station data in +the package, or you can get optionally get fresh station data from NOAA's +FTP server. +* There's a new function `isd_stations_search()` that uses the station data +to allow you to search for stations via either: + * A bounding box + * Radius froma point + +## Install + +For examples below, you'll need the development version: + + +```r +devtools::install_github("ropensci/rnoaa") +``` + +Load `rnoaa` + + +```r +library("rnoaa") +``` + +## ISD stations + +### Get stations + +There's a cached version of the station data in the package, or you can get fresh +station data from NOAA's FTP server. + + +```r +stations <- isd_stations() +head(stations) +#> usaf wban station_name ctry state icao lat lon elev_m begin end +#> 1 7005 99999 CWOS 07005 NA NA NA 20120127 20120127 +#> 2 7011 99999 CWOS 07011 NA NA NA 20111025 20121129 +#> 3 7018 99999 WXPOD 7018 0 0 7018 20110309 20130730 +#> 4 7025 99999 CWOS 07025 NA NA NA 20120127 20120127 +#> 5 7026 99999 WXPOD 7026 AF 0 0 7026 20120713 20141120 +#> 6 7034 99999 CWOS 07034 NA NA NA 20121024 20121106 +``` + +### Filter and visualize stations + +In addition to getting the entire station data.frame, you can also search for stations, +either with a bounding box or within a radius from a point. First, the bounding box -

    Measuring use of open source software isn't always straightforward. The problem is especially acute for software targeted largely at academia, where usage is not measured just by software downloads, but also by citations.

    -

    Citations are a well-known pain point because the citation graph is privately held by iron doors (e.g., Scopus, Google Scholar). New ventures aim to open up citation data, but of course it's an immense amount of work, and so does not come quickly.

    +```r +bbox <- c(-125.0, 38.4, -121.8, 40.9) +out <- isd_stations_search(bbox = bbox) +head(out) +#> usaf wban station_name ctry state icao +#> 1 720193 99999 LONNIE POOL FLD / WEAVERVILLE AIRPORT US CA KO54 +#> 2 724834 99999 POINT CABRILLO US CA +#> 3 724953 99999 RIO NIDO US CA +#> 4 724957 23213 SONOMA COUNTY AIRPORT US CA KSTS +#> 5 724957 99999 C M SCHULZ SONOMA CO US CA KSTS +#> 6 724970 99999 CHICO CALIFORNIA MAP US CA CIC +#> elev_m begin end lon lat +#> 1 716.0 20101030 20150831 -122.922 40.747 +#> 2 20.0 19810906 19871007 -123.820 39.350 +#> 3 -999.0 19891111 19900303 -122.917 38.517 +#> 4 34.8 20000101 20150831 -122.810 38.504 +#> 5 38.0 19430404 19991231 -122.817 38.517 +#> 6 69.0 19420506 19760305 -121.850 39.783 +``` -

    The following is a laundry list of metrics on software of which I am aware, and some of which I use in our rOpenSci twice monthly updates.

    +Where is the bounding box? (you'll need [lawn](https://cran.rstudio.com/web/packages/lawn/), or you can vizualize some other way) -

    I primarily develop software for the R language, so some of the metrics are specific to R, but many are not. In addition, we (rOpenSci) don't develop web apps, which may bring in an additional set of metrics not covered below.

    -

    I organize by source instead of type of data because some sources give multiple kinds of data - I note what kinds of data they give with labels.

    +```r +library("lawn") +lawn::lawn_bbox_polygon(bbox) %>% view +``` -

    CRAN downloads

    +![plot1](/public/img/2015-10-21-noaa-isd/bbox_area.png) -

    downloads

    +Vizualize station subset - yep, looks right -
      -
    • Link: https://github.com/metacran/cranlogs.app
    • -
    • This is a REST API for CRAN downloads from the RStudio CRAN CDN. Note however, that the RStudio CDN is only one of many - there are other mirrors users can insall packages from, and are not included in this count. However, a significant portion of downloads probably come from the RStudio CDN.
    • -
    • Other programming languages have similar support, e.g., Ruby and Node.
    • -
    -

    Lagotto

    +```r +library("leaflet") +leaflet(data = out) %>% + addTiles() %>% + addCircles() +``` -

    citations github social-media

    +![plot1](/public/img/2015-10-21-noaa-isd/bbox_result.png) - +Next, search with a lat/lon coordinate, with a radius. That is, we search for stations +within X km from the coordinate. -

    Depsy

    -

    citations github

    +```r +out <- isd_stations_search(lat = 38.4, lon = -123, radius = 250) +head(out) +#> usaf wban station_name ctry state icao elev_m begin +#> 1 690070 93217 FRITZSCHE AAF US CA KOAR 43.0 19600404 +#> 2 720267 23224 AUBURN MUNICIPAL AIRPORT US CA KAUN 466.7 20060101 +#> 3 720267 99999 AUBURN MUNICIPAL US CA KAUN 468.0 20040525 +#> 4 720406 99999 GNOSS FIELD AIRPORT US CA KDVO 0.6 20071114 +#> 5 720576 174 UNIVERSITY AIRPORT US CA KEDU 21.0 20130101 +#> 6 720576 99999 DAVIS US CA KEDU 21.0 20080721 +#> end lon lat +#> 1 19930831 -121.767 36.683 +#> 2 20150831 -121.082 38.955 +#> 3 20051231 -121.082 38.955 +#> 4 20150831 -122.550 38.150 +#> 5 20150831 -121.783 38.533 +#> 6 20121231 -121.783 38.533 +``` -
      -
    • Link: http://depsy.org
    • -
    • This is a nascent venture by the ImpactStory team that seeks to uncover the impact of research software. As far as I can tell, they'll collect usage via software downloads and citations in the literature.
    • -
    +Again, compare search area to stations found -

    Web Site Analytics

    +_search area_ -

    page-views

    -
      -
    • If you happen to have a website for your project, collecting analytics is a way to gauge views of the landing page, and any help/tutorial pages you may have. A good easy way to do this is a deploy a basic site on your gh-pages branch of your GitHub repo, and use the easily integrated Google Analytics.
    • -
    • Whatever analytics you use, in my experience this mostly brings up links from google searches and blog posts that may mention your project
    • -
    • Google Analytics beacon (for README views): https://github.com/igrigorik/ga-beacon. I haven't tried this yet, but seems promising.
    • -
    +```r +pt <- lawn::lawn_point(c(-123, 38.4)) +lawn::lawn_buffer(pt, dist = 250) %>% view +``` -

    Auomated tracking: SSNMP

    +![plot1](/public/img/2015-10-21-noaa-isd/circle_radius.png) -

    citations github

    +_stations found_ -
      -
    • Link: http://scisoft-net-map.isri.cmu.edu
    • -
    • Scientific Software Network Map Project
    • -
    • This is a cool NSF funded project by Chris Bogart that tracks software usage via GitHub and citations in literature.
    • -
    -

    Google Scholar

    +```r +leaflet(data = out) %>% + addTiles() %>% + addCircles() +``` -

    citations

    +![plot1](/public/img/2015-10-21-noaa-isd/lastplot.png) -
      -
    • Link: https://scholar.google.com/
    • -
    • Searching Google Scholar for software citations manually is fine at a small scale, but at a larger scale scraping is best. However, you're not legally supposed to do this, and Google will shut you down.
    • -
    • Could try using g-scholar alerts as well, especially if new citations of your work are infrequent.
    • -
    • If you have institutional access to Scopus/Web of Science, you could search those, but I don't push this as an option since it's available to so few.
    • -
    -

    GitHub

    -

    github

    +## ISD data - +### Get ISD data -

    Other

    +Here, I get data for four stations. -
      -
    • Support forums: Whether you use UserVoice, Discourse, Google Groups, Gitter, etc., depending on your viewpoint, these interactions could be counted as metrics of software usage.
    • -
    • Emails: I personally get a lot of emails asking for help with software I maintain. I imagine this is true for most software developers. Counting these could be another metric of software usage, although I never have counted mine.
    • -
    • Social media: See Lagotto above, which tracks some social media outlets.
    • -
    • Code coverage: There are many options now for code coverage, integrated with each Travis-CI build. A good option is CodeCov. CodeCov gives percentage test coverage, which one could use as one measure of code quality.
    • -
    • Reviews: There isn't a lot of code review going on that I'm aware of. Even if there was, I suppose this would just be a logical TRUE/FALSE.
    • -
    • Cash money y'all: Grants/consulting income/etc. could be counted as a metric.
    • -
    • Users: If you require users to create an account or similar before getting your software, you have a sense of number of users and perhaps their demographics.
    • -
    -

    Promising

    +```r +res1 <- isd(usaf="011690", wban="99999", year=1993) +res2 <- isd(usaf="172007", wban="99999", year=2015) +res3 <- isd(usaf="702700", wban="00489", year=2015) +res4 <- isd(usaf="109711", wban=99999, year=1970) +``` -

    Some software metrics things on the horizon that look interesting:

    +Then, combine data, with `rnoaa:::rbind.isd()` - -

    Missed?

    +```r +res_all <- rbind(res1, res2, res3, res4) +``` -

    I'm sure I missed things. Let me know.

    +Add date time + + +```r +library("lubridate") +res_all$date_time <- ymd_hm( + sprintf("%s %s", as.character(res_all$date), res_all$time) +) +``` + +Remove 999's (NOAA's way to indicate missing/no data) + + +```r +library("dplyr") +res_all <- res_all %>% filter(temperature < 900) +``` + +### Visualize ISD data + + +```r +library("ggplot2") +ggplot(res_all, aes(date_time, temperature)) + + geom_line() + + facet_wrap(~usaf_station, scales = "free_x") +``` + +![img](/public/img/2015-10-21-noaa-isd/unnamed-chunk-12-1.png)

    - - analogsea - an R client for the Digital Ocean API + + Metrics for open source projects

    - + + + Measuring use of open source software isn't always straightforward. The problem is especially acute for software targeted largely at academia, where usage is not measured just by software downloads, but also by citations. + +Citations are a well-known pain point because the citation graph is privately held by iron doors (e.g., [Scopus][scopus], [Google Scholar][schol]). New ventures aim to open up citation data, but of course it's an immense amount of work, and so does not come quickly. + +The following is a laundry list of metrics on software of which I am aware, and some of which I use in our [rOpenSci twice monthly updates][news]. + +I primarily develop software for the R language, so some of the metrics are specific to R, but many are not. In addition, we (rOpenSci) don't develop web apps, which may bring in an additional set of metrics not covered below. + +I organize by source instead of type of data because some sources give multiple kinds of data - I note what kinds of data they give with labels. + +## CRAN downloads -

    analogsea is now on CRAN. We started developing the pkg back in May 2014, but just -now getting the first version on CRAN. It's a collaboration with Hadley and Winston Chang.

    +downloads -

    Most of analogsea package is for interacting with the Digital Ocean API, including:

    +- Link: [https://github.com/metacran/cranlogs.app](https://github.com/metacran/cranlogs.app) +- This is a REST API for CRAN downloads from the RStudio CRAN CDN. Note however, that the RStudio CDN is only one of many - there are other mirrors users can insall packages from, and are not included in this count. However, a significant portion of downloads probably come from the RStudio CDN. +- Other programming languages have similar support, e.g., [Ruby](http://guides.rubygems.org/rubygems-org-api/) and [Node](https://github.com/npm/download-counts). -
      -
    • Manage domains
    • -
    • Manage ssh keys
    • -
    • Get actions
    • -
    • Manage images
    • -
    • Manage droplets (servers)
    • -
    +## Lagotto -

    A number of convenience functions are included for doing tasks (e.g., resizing -a droplet) that aren't supported by Digital Ocean's API out of the box (i.e., -there's no API route for it).

    +citations github social-media -

    In addition to wrapping their API routes, we provide other functionality, e.g.:

    +- Link: [http://software.lagotto.io/works](http://software.lagotto.io/works) +- Lagotto is a Rails application, developed by [Martin Fenner](https://github.com/mfenner), originally designed to collect and provide article level metrics for scientific publications at Public Library of Science. It is now used by many publishers, and there are installations of Lagotto targeting [datasets](http://mdc.lagotto.io/) and [software](http://software.lagotto.io/works). +- Discussion forum: [http://discuss.lagotto.io/](http://discuss.lagotto.io/) -
      -
    • execute shell commands on a droplet (server)
    • -
    • execute R commands on a droplet
    • -
    • install R
    • -
    • install RStudio server
    • -
    • install Shiny server
    • -
    +## Depsy -

    Other functionality we're working on, not yet available:

    +citations github -
      -
    • install OpenCPU
    • -
    • use packrat to move projects from local to server, and vice versa
    • -
    +- Link: [http://depsy.org](http://depsy.org) +- This is a nascent venture by the [ImpactStory team](https://impactstory.org/about) that seeks to uncover the impact of research software. As far as I can tell, they'll collect usage via software downloads and citations in the literature. -

    See also: two previous blog posts on this package http://recology.info/2014/05/analogsea/ and http://recology.info/2014/06/analogsea-v01/

    +## Web Site Analytics -

    Install

    +page-views -

    Binaries are not yet on CRAN, but you can install from source.

    -
    # install.packages("analogsea") # when binaries available
    -install.packages("analogsea", repos = "https://cran.r-project.org", type = "source")
    -
    -

    Or install development version from GitHub

    -
    devtools::install_github("sckott/analogsea")
    -
    -

    Load analogsea

    -
    library("analogsea")
    -
    -

    Etc.

    +- If you happen to have a website for your project, collecting analytics is a way to gauge views of the landing page, and any help/tutorial pages you may have. A good easy way to do this is a deploy a basic site on your `gh-pages` branch of your GitHub repo, and use the easily integrated Google Analytics. +- Whatever analytics you use, in my experience this mostly brings up links from google searches and blog posts that may mention your project +- Google Analytics beacon (for README views): [https://github.com/igrigorik/ga-beacon](https://github.com/igrigorik/ga-beacon). I haven't tried this yet, but seems promising. -

    As this post is mostly to announce that this pkg is on CRAN now, I won't go through examples, but instead point you to the package README and vignette in which we cover -creating a Digital Ocean account, authenticating, and have many examples.

    +## Auomated tracking: SSNMP -

    Feedback

    +citations github -

    Let us know what you think. We'd love to hear about any problems, use cases, feature requests.

    +- Link: [http://scisoft-net-map.isri.cmu.edu](http://scisoft-net-map.isri.cmu.edu) +- Scientific Software Network Map Project +- This is a cool NSF funded project by Chris Bogart that tracks software usage via GitHub and citations in literature. + +## Google Scholar + +citations + +- Link: [https://scholar.google.com/](https://scholar.google.com/) +- Searching Google Scholar for software citations manually is fine at a small scale, but at a larger scale scraping is best. However, you're not legally supposed to do this, and Google will shut you down. +- Could try using g-scholar alerts as well, especially if new citations of your work are infrequent. +- If you have institutional access to Scopus/Web of Science, you could search those, but I don't push this as an option since it's available to so few. + +## GitHub + +github + +- Links: [https://developer.github.com/v3/](https://developer.github.com/v3/) +- I keep a list of rOpenSci uses found in GitHub repos at [https://discuss.ropensci.org/t/use-of-some-ropensci-packages-on-github/137](https://discuss.ropensci.org/t/use-of-some-ropensci-packages-on-github/137) +- GitHub does collect traffic data on each repo (clones, downloads, page views), but they are not exposed in the API. I've bugged them a bit about this - hopefully we'll be able to get that dat in their API soon. +- Bitbucket/Gitlab - don't use them, but I assume they also provide some metrics via their APIs + +## Other + +- Support forums: Whether you use UserVoice, Discourse, Google Groups, Gitter, etc., depending on your viewpoint, these interactions could be counted as metrics of software usage. +- Emails: I personally get a lot of emails asking for help with software I maintain. I imagine this is true for most software developers. Counting these could be another metric of software usage, although I never have counted mine. +- Social media: See Lagotto above, which tracks some social media outlets. +- Code coverage: There are many options now for code coverage, integrated with each Travis-CI build. A good option is [CodeCov](https://codecov.io). CodeCov gives percentage test coverage, which one could use as one measure of code quality. +- Reviews: There isn't a lot of code review going on that I'm aware of. Even if there was, I suppose this would just be a logical TRUE/FALSE. +- Cash money y'all: Grants/consulting income/etc. could be counted as a metric. +- Users: If you require users to create an account or similar before getting your software, you have a sense of number of users and perhaps their demographics. + +## Promising + +Some software metrics things on the horizon that look interesting: + +* [Software Attribution for Geoscience Applications][saga] (SAGA) +* Crossref: They have [a very nice API][crapi], but they don't yet provide citation counts - but [they may soon][crmaybe]. +* [njsmith/sempervirens](https://github.com/njsmith/sempervirens) - a prototype for _gathering anonymous, opt-in usage data for open scientific software_ +* [Force11 Software Citation Working Group](https://github.com/force11/force11-scwg) - _...produce a consolidated set of citation principles in order to encourage broad adoption of a consistent policy for software citation across disciplines and venues_ + +## Missed? + +I'm sure I missed things. Let me know. + +[scopus]: http://www.scopus.com/ +[schol]: https://scholar.google.com/ +[saga]: https://geodynamics.org/cig/projects/saga/ +[crapi]: https://github.com/CrossRef/rest-api-doc/blob/master/rest_api.md +[crmaybe]: https://github.com/CrossRef/rest-api-doc/issues/46 +[neil]: https://youtu.be/jMH7FTGqQEE?t=1h3m41s +[wssspe3]: http://wssspe.researchcomputing.org.uk/wssspe3/ +[news]: http://ropensci.github.io/biweekly/

    - - oai - an OAI-PMH client + + analogsea - an R client for the Digital Ocean API

    - - -

    oai is a general purpose client to work with any 'OAI-PMH' service. The 'OAI-PMH' protocol is described at http://www.openarchives.org/OAI/openarchivesprotocol.html. The main functions follow the OAI-PMH verbs:

    - -
      -
    • GetRecord
    • -
    • Identify
    • -
    • ListIdentifiers
    • -
    • ListMetadataFormats
    • -
    • ListRecords
    • -
    • ListSets
    • -
    - -

    The repo is at https://github.com/sckott/oai

    - -

    I will be using this in a number of packages I maintain that use OAI-PMH data services. If you try it, let me know what you think.

    - -

    This package is heading to rOpenSci soon: https://github.com/ropensci/onboarding/issues/19

    - -

    Here's a few usage examples:

    - -

    Install

    - -

    Is on CRAN now, but binaries may not be available yet.

    -
    install.packages("oai")
    -
    -

    Or install development version from GitHub

    -
    devtools::install_github("sckott/oai")
    -
    -

    Load oai

    -
    library("oai")
    -
    -

    Identify

    -
    id("http://oai.datacite.org/oai")
    -#>   repositoryName                     baseURL protocolVersion
    -#> 1   DataCite MDS http://oai.datacite.org/oai             2.0
    -#>           adminEmail    earliestDatestamp deletedRecord
    -#> 1 admin@datacite.org 2011-01-01T00:00:00Z    persistent
    -#>            granularity compression compression.1
    -#> 1 YYYY-MM-DDThh:mm:ssZ        gzip       deflate
    -#>                                      description
    -#> 1 oaioai.datacite.org:oai:oai.datacite.org:12425
    -
    -

    ListIdentifiers

    -
    list_identifiers(from = '2011-05-01T', until = '2011-09-01T')
    -#> <ListRecords> 925 X 6 
    -#> 
    -#>                    identifier            datestamp setSpec setSpec.1
    -#> 1  oai:oai.datacite.org:32153 2011-06-08T08:57:11Z     TIB  TIB.WDCC
    -#> 2  oai:oai.datacite.org:32200 2011-06-20T08:12:41Z     TIB TIB.DAGST
    -#> 3  oai:oai.datacite.org:32220 2011-06-28T14:11:08Z     TIB TIB.DAGST
    -#> 4  oai:oai.datacite.org:32241 2011-06-30T13:24:45Z     TIB TIB.DAGST
    -#> 5  oai:oai.datacite.org:32255 2011-07-01T12:09:24Z     TIB TIB.DAGST
    -#> 6  oai:oai.datacite.org:32282 2011-07-05T09:08:10Z     TIB TIB.DAGST
    -#> 7  oai:oai.datacite.org:32309 2011-07-06T12:30:54Z     TIB TIB.DAGST
    -#> 8  oai:oai.datacite.org:32310 2011-07-06T12:42:32Z     TIB TIB.DAGST
    -#> 9  oai:oai.datacite.org:32325 2011-07-07T11:17:46Z     TIB TIB.DAGST
    -#> 10 oai:oai.datacite.org:32326 2011-07-07T11:18:47Z     TIB TIB.DAGST
    -#> ..                        ...                  ...     ...       ...
    -#> Variables not shown: setSpec.2 (chr), setSpec.3 (chr)
    -
    -

    Count Identifiers

    -
    count_identifiers()
    -#>                           url   count
    -#> 1 http://oai.datacite.org/oai 6350706
    -
    -

    ListRecords

    -
    list_records(from = '2011-05-01T', until = '2011-08-15T')
    -#> <ListRecords> 126 X 46 
    -#> 
    -#>                    identifier            datestamp setSpec setSpec.1
    -#> 1  oai:oai.datacite.org:32153 2011-06-08T08:57:11Z     TIB  TIB.WDCC
    -#> 2  oai:oai.datacite.org:32200 2011-06-20T08:12:41Z     TIB TIB.DAGST
    -#> 3  oai:oai.datacite.org:32220 2011-06-28T14:11:08Z     TIB TIB.DAGST
    -#> 4  oai:oai.datacite.org:32241 2011-06-30T13:24:45Z     TIB TIB.DAGST
    -#> 5  oai:oai.datacite.org:32255 2011-07-01T12:09:24Z     TIB TIB.DAGST
    -#> 6  oai:oai.datacite.org:32282 2011-07-05T09:08:10Z     TIB TIB.DAGST
    -#> 7  oai:oai.datacite.org:32309 2011-07-06T12:30:54Z     TIB TIB.DAGST
    -#> 8  oai:oai.datacite.org:32310 2011-07-06T12:42:32Z     TIB TIB.DAGST
    -#> 9  oai:oai.datacite.org:32325 2011-07-07T11:17:46Z     TIB TIB.DAGST
    -#> 10 oai:oai.datacite.org:32326 2011-07-07T11:18:47Z     TIB TIB.DAGST
    -#> ..                        ...                  ...     ...       ...
    -#> Variables not shown: title (chr), creator (chr), creator.1 (chr),
    -#>      creator.2 (chr), creator.3 (chr), creator.4 (chr), creator.5 (chr),
    -#>      creator.6 (chr), creator.7 (chr), publisher (chr), date (chr),
    -#>      identifier.2 (chr), identifier.1 (chr), subject (chr), description
    -#>      (chr), description.1 (chr), contributor (chr), language (chr), type
    -#>      (chr), type.1 (chr), format (chr), format.1 (chr), rights (chr),
    -#>      subject.1 (chr), relation (chr), subject.2 (chr), subject.3 (chr),
    -#>      subject.4 (chr), setSpec.2 (chr), setSpec.3 (chr), format.2 (chr),
    -#>      subject.5 (chr), subject.6 (chr), subject.7 (chr), description.2
    -#>      (chr), description.3 (chr), description.4 (chr), description.5 (chr),
    -#>      title.1 (chr), relation.1 (chr), relation.2 (chr), contributor.1
    -#>      (chr)
    -
    -

    GetRecords

    -
    get_records(c("oai:oai.datacite.org:32255", "oai:oai.datacite.org:32325"))
    -#> <GetRecord> 2 X 23 
    -#> 
    -#>                   identifier            datestamp setSpec setSpec.1
    -#> 1 oai:oai.datacite.org:32255 2011-07-01T12:09:24Z     TIB TIB.DAGST
    -#> 2 oai:oai.datacite.org:32325 2011-07-07T11:17:46Z     TIB TIB.DAGST
    -#> Variables not shown: title (chr), creator (chr), creator.1 (chr),
    -#>      creator.2 (chr), creator.3 (chr), publisher (chr), date (chr),
    -#>      identifier.1 (chr), subject (chr), subject.1 (chr), description
    -#>      (chr), description.1 (chr), contributor (chr), language (chr), type
    -#>      (chr), type.1 (chr), format (chr), format.1 (chr), rights (chr)
    -
    -

    List MetadataFormats

    -
    list_metadataformats(id = "oai:oai.datacite.org:32348")
    -#> $`oai:oai.datacite.org:32348`
    -#>   metadataPrefix
    -#> 1         oai_dc
    -#> 2       datacite
    -#> 3   oai_datacite
    -#>                                                        schema
    -#> 1              http://www.openarchives.org/OAI/2.0/oai_dc.xsd
    -#> 2 http://schema.datacite.org/meta/nonexistant/nonexistant.xsd
    -#> 3              http://schema.datacite.org/oai/oai-1.0/oai.xsd
    -#>                             metadataNamespace
    -#> 1 http://www.openarchives.org/OAI/2.0/oai_dc/
    -#> 2      http://datacite.org/schema/nonexistant
    -#> 3     http://schema.datacite.org/oai/oai-1.0/
    -
    -

    List Sets

    -
    list_sets("http://oai.datacite.org/oai")
    -#> <ListSets> 1227 X 2 
    -#> 
    -#>                     setSpec
    -#> 1                REFQUALITY
    -#> 2                      ANDS
    -#> 3           ANDS.REFQUALITY
    -#> 4             ANDS.CENTRE-1
    -#> 5  ANDS.CENTRE-1.REFQUALITY
    -#> 6             ANDS.CENTRE-2
    -#> 7  ANDS.CENTRE-2.REFQUALITY
    -#> 8             ANDS.CENTRE-3
    -#> 9  ANDS.CENTRE-3.REFQUALITY
    -#> 10            ANDS.CENTRE-5
    -#> ..                      ...
    -#> Variables not shown: setName (chr)
    -
    + + + `analogsea` is now on CRAN. We started developing the pkg back in [May 2014][firstcomm], but just +now getting the first version on CRAN. It's a collaboration with [Hadley][hadley] and [Winston Chang][chang]. + +Most of `analogsea` package is for interacting with the [Digital Ocean API](https://developers.digitalocean.com/documentation/v2/), including: + +* Manage domains +* Manage ssh keys +* Get actions +* Manage images +* Manage droplets (servers) + +A number of convenience functions are included for doing tasks (e.g., resizing +a droplet) that aren't supported by Digital Ocean's API out of the box (i.e., +there's no API route for it). + +In addition to wrapping their API routes, we provide other functionality, e.g.: + +* execute shell commands on a droplet (server) +* execute R commands on a droplet +* install R +* install RStudio server +* install Shiny server + +Other functionality we're working on, not yet available: + +* install OpenCPU +* use `packrat` to move projects from local to server, and vice versa + +See also: two previous blog posts on this package [http://recology.info/2014/05/analogsea/](http://recology.info/2014/05/analogsea/) and [http://recology.info/2014/06/analogsea-v01/](http://recology.info/2014/06/analogsea-v01/) + +## Install + +Binaries are not yet on CRAN, but you can install from source. + + +```r +# install.packages("analogsea") # when binaries available +install.packages("analogsea", repos = "https://cran.r-project.org", type = "source") +``` + +Or install development version from GitHub + + +```r +devtools::install_github("sckott/analogsea") +``` + +Load `analogsea` + + +```r +library("analogsea") +``` + +## Etc. + +As this post is mostly to announce that this pkg is on CRAN now, I won't go through examples, but instead point you to the package [README][readme] and [vignette][vign] in which we cover +creating a Digital Ocean account, authenticating, and have many examples. + +## Feedback + +Let us know what you think. We'd love to hear about any problems, use cases, feature requests. + +[firstcomm]: https://github.com/sckott/analogsea/commit/b129164dd87969d2fc6bcf3b51576fe1da932fdb +[hadley]: http://had.co.nz/ +[chang]: https://github.com/wch/ +[readme]: https://github.com/sckott/analogsea/blob/master/README.md +[vign]: https://github.com/sckott/analogsea/blob/master/vignettes/doapi.Rmd +
    diff --git a/_site/page20/index.html b/_site/page20/index.html index f36fe43204..50c2bda524 100644 --- a/_site/page20/index.html +++ b/_site/page20/index.html @@ -61,338 +61,244 @@

    Recology

    - - On writing, sharing, collaborating, and hosting code for science + + Beyond academia

    - - -

    I recently engaged with a number of tweeps in response to my tweet:

    - -
    -

    Rule number 1 wrt science code: DO NOT post your code on your personal website

    -
    - -

    That tweet wasn't super clear, and it's difficult to convey my thoughts in a tweet. What I should have said was do post your code - ideally on Github/Bitbucket/etc. Here goes with a much longer version to explain what I meant. The tweet was just about where to host code, whereas the following is about more than that, but related.

    - -

    Code writing during analyses, etc.

    - -

    When you write code to do simulations, analyses, data manipulation, visualization - whatever it is - it helps to version your code. That is, not naming files like myfile_v1.r, myfile_v2.r, etc., but with versioning using version control systems (VCS) like git, svn, mercurial, etc. Although git will give you headaches during the learning process, it takes care of versioning your code for you, finding differences in different versions, helps you manage conflicts from different contributors, and allows you to restore that old code you accidentally deleted.

    - -

    And you don't have to use git or svn on a code hosting site - you can use git or svn locally on your own machine. However, there are many benefits to putting your code up on the interwebs.

    - -

    Collaborating on code

    - -

    Whenever you collaborate on code writing you have the extreme joy of dealing with conflicts. Perhaps you use Dropbox for collaborating on some code writing. Crap, now there is a line of code that messes up the analysis, and you don't know who put it there, and why it's there. Wouldn't it be nice to have a place to collect bugs in the code.

    - -

    All of these things become easy if you host code on a service such as Github. If you are already versioning your code with git you are in luck - all you need to do is create an account on github/bitbucket and push your code up. If not, you should definitley learn git.

    - -

    Hosting your code on Github (or Bitbucket, etc.) allows each collaborator to work separately on the code simultaneously, then merge their code together, while git helps you take care of merging. An awesome feature of git (and other VCS's) is branching. What the heck is that? Basically, you can create a complete copy of your git project, do any changes you want, then throw it away or merge it back in to your main branch. Pretty sweet.

    - -

    Sharing your code

    - -

    Whether sharing your code with a collaborator, or with the world, if you put code on a website created specifically for hosting code, I would argue your life would be easier. Groups like Github and Bitbucket have solved a lot of problems around versioning code, displaying it, etc., whereas your website (whether it be Google sites, Wordpress, Tumblr, etc.) can not say the same.

    - -

    It is becoming clear to many that open science has many benefits. For the sake of transparency and contributing to the public science good, I would argue that sharing your code is the right thing to do, especially given that most of us are publicly funded. However, even if you don't want to share your code publicly, you can get free private hosting with an academic discount on Github, and Bitbucket gives you private hosting for free.

    + -

    Contributing to the software you use

    + As ecologists, we often start graduate school worshiping the ivory tower of academia with its freedom to pursue important ecological questions. However, studies have shown that most of us do not end up in academia. Greater numbers of ecology graduates are leaving the ivory tower for non-academic career paths. But for many graduates, moving from an academic environment to a non-academic job may be difficult. In graduate school we are trained to work in a particular way, often with loose deadlines and unlimited intellectual freedom (within reason of course). The culture and expectations of the non-academic world may be quite different. What are the skills that you need in a government job, or in science journalism? How do you market yourself for a non-academic position? This is a timely topic because funding to academic ecologists is being cut, leaving fewer opportunities in the academic arena. In fact, an ESA Student Section survey found that an ESA 2013 session on non-academic career paths in ecology was the topic of greatest interest. -

    Much of the software you and I use in R, Python, etc. is likely hosted on a code hosting platform such as Github, Bitbucket, R-Forge, etc. Code gets better faster if its users report bugs and request features to the software authors. By creating an account on Github, for example, to host your own code, you can easily report bugs or request features where others are developing software you use. This is better than email as only those two people get the benefit of learning from the conversation - while engaging where the software is created, or on a related mailing list, helps everyone.

    +[Sandra Chung][sandra] and I organized an ESA lunchtime session on Tuesday the 6th with panelists from an array of non-academic careers to offer advice and share their experiences. Each panelist will speak briefly, introducing themselves and a bit about what they do. About half of the time will be reserved for an open discussion in which you all attending the session help decide what to talk about. -

    On long-term availability of code

    +You can find the description of the session at the ESA site [here][ses]. -

    Where is the best place to host your code in the long-term. Some may trust their own website over a company - a company can go out of business, be sold to another company and then be shut down, etc. However, code on personal websites can also be lost if a person moves institutions, etc. If you use a VCS, and host your code on Bitbucket/Github/Etc., even if they shut down, you will always have the same code that was up on their site, and you can host it on the newer awesome code hosting site. In addition, even if a company shuts down and you have to move your code, you are getting all the benefits as stated above.

    +**The details:** -

    Anyway...

    ++ When: Tuesday, August 6, 2013: 11:30 AM-1:15 PM ++ Where: 101B, Minneapolis Convention Center ++ Who (the panelists): + + [Virginia Gewin][virginia]: Independent science journalist (w/ work in Science/Nature/etc.) + + [Liz Neeley][liz]: Science communication/journalism at COMPASS (a non-profit org.) + + [Joe Simonis][joe]: Research scientist at the Lincon Park Zoo (a non-profit org.) + + [Ted Hart][ted]: Soon to be statistician at NEON + + [Lael Goodman][lael]: Analyst w/ the Union of Concerned Scientists -

    My point is this: do post your code somewhere, even if on your own site, but I think you'll find that you and others can get the most out of your code if you host it on Bitbucket, Github, etc. Do tell me if you think I'm wrong and why.

    +**Get involved** -

    A few resources if you're so inclined

    +As a placeholder on the web for things related to Beyond Academia, and a place to find out more about the session at ESA, we started a wiki. Check it out [here][wiki]. Please do visit the wiki, and contribute your ideas for topics to discuss. In addition, we have a *Resources* page on the wiki [here][resources] to collect resources related to moving beyond academia. - +[ses]: http://eco.confex.com/eco/2013/webprogram/Session9083.html +[wiki]: http://beyondacademia.wikispaces.com/home +[resources]: http://beyondacademia.wikispaces.com/Resources +[sandra]: http://sandrachung.com/ +[joe]: http://www.lpzoo.org/conservation-science/resources/staff-bios/joseph-l-simonis-phd +[virginia]: http://www.virginiagewin.com/ +[ted]: http://emhart.github.io/ +[lael]: http://linkd.in/15NCg3j +[liz]: http://www.compassonline.org/staff/LizNeeley

    - - R to GeoJSON + + On writing, sharing, collaborating, and hosting code for science

    - - -

    UPDATE As you can see in Patrick's comment below you can convert to GeoJSON format files with rgdal as an alternative to calling the Ogre web API described below. See here for example code for converting to GeoJSON with rgdal.

    - -
    - -

    GitHub recently introduced the ability to render GeoJSON files on their site as maps here, and recently introduced here support for TopoJSON, an extension of GeoJSON can be up to 80% smaller than GeoJSON, support for other file extensions (.topojson and .json), and you can embed the maps on other sites (so awesome). The underlying maps used on GitHub are Openstreet Maps.

    - -

    A recent blog post showed how to convert .shp or .kml files to GeoJSON to then upload to GitHub here. The approach used Ruby on the command line locally to convert the geospatial files to GeoJSON.

    - -

    Can we do this in R? Perhaps others have already done this, but there's more than one way to do anything, no?

    - -

    I'm not aware of a converter to GeoJSON within R, but there is a web service that can do this, called Ogre. The service lets you POST a file, which then converts to GeoJSON and gives it back to you. Ogre accepts many different file formats: BNA, CSV, DGN, DXF, zipped shapefiles, GeoConcept, GeoJSON, GeoRSS, GML, GMT, KML, MapInfo, and VRT.

    - -

    We can use the Ogre API to upload a local geospatial file of various formats and get the GeoJSON back, then put it up on GitHub, and they render the map for you. Sweetness.

    - -

    So here's the protocol.

    - -
    - -

    1. Load httr. What is httr? For those not in the know it is a simpler wrapper around RCurl, a curl interface for R.

    - -
    # install.packages('httr')
    -library(httr)
    - -

    2. Here is a function to convert your geospatial files to GeoJSON (with roxygen docs).

    - -
    togeojson <- function(file, writepath = "~") {
    -    url <- "http://ogre.adc4gis.com/convert"
    -    tt <- POST(url, body = list(upload = upload_file(file)))
    -    out <- content(tt, as = "text")
    -    fileConn <- file(writepath)
    -    writeLines(out, fileConn)
    -    close(fileConn)
    -}
    - -

    3. Convert a file to GeoJSON

    - -

    KML

    - -

    In the first line I specify the location of the file on my machine. In the second line the function togeojson reads in the file, sends the file to the API endpoint http://ogre.adc4gis.com/convert, collects the returned GeoJSON object, and saves the GeoJSON to a file that you specify. Here we are converting a KML file with point occurrences (data collected from USGS's BISON service).

    - -
    file <- "~/github/sac/rgeojson/acer_spicatum.kml"
    -togeojson(file, "~/github/sac/rgeojson/acer_spicatum.geojson")
    - -

    Shapefiles

    + -

    Here, we are converting a zip file containing shape files for Pinus contorta (data collected from the USGS here.

    + I recently engaged with a number of tweeps in response to my tweet: -
    file <- "~/github/sac/rgeojson/pinucont.zip"
    -togeojson(file, "~/github/sac/rgeojson/pinus.geojson")
    +> Rule number 1 wrt science code: DO NOT post your code on your personal website -

    4. Then commit and push to GitHub. And this is what they look like on GitHub

    +That tweet wasn't super clear, and it's difficult to convey my thoughts in a tweet. What I should have said was do post your code - ideally on Github/Bitbucket/etc. Here goes with a much longer version to explain what I meant. The tweet was just about where to host code, whereas the following is about more than that, but related. -

    Acer spicatum distribution (points)

    +### Code writing during analyses, etc. - +When you write code to do simulations, analyses, data manipulation, visualization - whatever it is - it helps to version your code. That is, not naming files like *myfile_v1.r*, *myfile_v2.r*, etc., but with versioning using version control systems (VCS) like [git][git], [svn][svn], [mercurial][mc], etc. Although git will give you headaches during the learning process, it takes care of versioning your code for you, finding differences in different versions, helps you manage conflicts from different contributors, and allows you to restore that old code you accidentally deleted. - +And you don't have to use git or svn on a code hosting site - you can use git or svn locally on your own machine. However, there are many benefits to putting your code up on the interwebs. -

    Pinus contorta distribution (polygons)

    +### Collaborating on code - +Whenever you collaborate on code writing you have the extreme joy of dealing with conflicts. Perhaps you use Dropbox for collaborating on some code writing. Crap, now there is a line of code that messes up the analysis, and you don't know who put it there, and why it's there. Wouldn't it be nice to have a place to collect bugs in the code. - +All of these things become easy if you host code on a service such as Github. If you are already versioning your code with git you are in luck - all you need to do is create an account on github/bitbucket and push your code up. If not, you should definitley learn git. -
    +Hosting your code on Github (or Bitbucket, etc.) allows each collaborator to work separately on the code simultaneously, then merge their code together, while git helps you take care of merging. An awesome feature of git (and other VCS's) is branching. What the heck is that? Basically, you can create a complete copy of your git project, do any changes you want, then throw it away or merge it back in to your main branch. Pretty sweet. -

    If you want, you can clone a repo from my account. Then do the below. (of course, you must have git installed, and have a GitHub account...)

    +### Sharing your code + +Whether sharing your code with a collaborator, or with the world, if you put code on a website created specifically for hosting code, I would argue your life would be easier. Groups like Github and Bitbucket have solved a lot of problems around versioning code, displaying it, etc., whereas your website (whether it be Google sites, Wordpress, Tumblr, etc.) can not say the same. -

    First, fork my rgeojson repo here to your GitHub account.

    +It is becoming clear to many that open science has many benefits. For the sake of transparency and contributing to the public science good, I would argue that sharing your code is the right thing to do, especially given that most of us are publicly funded. However, even if you don't want to share your code publicly, you can get free private hosting with an [academic discount on Github](https://github.com/edu), and Bitbucket gives you private hosting for free. -

    Second, in your terminal/command line...

    +### Contributing to the software you use -
    git clone https://github.com/<yourgithubusername>/rgeojson.git
    -cd rgeojson
    +Much of the software you and I use in R, Python, etc. is likely hosted on a code hosting platform such as Github, Bitbucket, R-Forge, etc. Code gets better faster if its users report bugs and request features to the software authors. By creating an account on Github, for example, to host your own code, you can easily report bugs or request features where others are developing software you use. This is better than email as only those two people get the benefit of learning from the conversation - while engaging where the software is created, or on a related mailing list, helps everyone. -

    Third, in R specify the location of either the KML file or the zipped shape files, then call togeojson function to convert the KML file to a GeoJSON file (which should output a file acer_spicatum.geojson)

    +### On long-term availability of code -
    file <- "/path/to/acer_spicatum.kml"
    -togeojson(file, "~/path/to/write/to/acer_spicatum.geojson")
    +Where is the best place to host your code in the long-term. Some may trust their own website over a company - a company can go out of business, be sold to another company and then be shut down, etc. However, code on personal websites can also be lost if a person moves institutions, etc. If you use a VCS, and host your code on Bitbucket/Github/Etc., even if they shut down, you will always have the same code that was up on their site, and you can host it on the newer awesome code hosting site. In addition, even if a company shuts down and you have to move your code, you are getting all the benefits as stated above. -

    Fourth, back in the terminal...

    +### Anyway... -
    git add acer_spicatum.geojson
    -git commit -a -m 'some cool commit message'
    -git push
    +My point is this: do post your code somewhere, even if on your own site, but I think you'll find that you and others can get the most out of your code if you host it on Bitbucket, Github, etc. Do tell me if you think I'm wrong and why. -

    Fifth, go to your rgeojson repo on GitHub and click on the acer_spicatum.geojson file, and the map should render.

    +### A few resources if you're so inclined -
    ++ [Push, Pull, Fork: GitHub for Academics](http://hybridpedagogy.com/Journal/files/GitHub_for_Academics.html) ++ Carl Boettiger has some interesting posts on [research workflow](http://carlboettiger.info/2012/05/06/research-workflow.html) and [github issues as a research to do list](http://carlboettiger.info/2012/12/06/github-issues-tracker:-the-perfect-research-todo-list) ++ Do have a look at [Karthik Ram's][kr] paper on how git can facilitate greater reproducibility and transparency in science [here][karthik]. ++ Github is posting a bunch of videos on Youtube that are quite helpful for learning how to use git and Github [here][gityou] ++ Git GUIs make using git easier: + + [SourceTree](http://www.sourcetreeapp.com/) + + [GitBox](http://gitboxapp.com/) + + [Github's git GUI](http://mac.github.com/) -

    Look for this functionality to come to the rbison and rgbif R packages soon. Why is that cool? Think of the workflow: Query for species occurrence data in the BISON or GBIF databases, convert the results to a GeoJSON file, push to GitHub, and you have an awesome interactive map on the web. Not too bad eh.

    +[git]: http://git-scm.com/ +[svn]: http://subversion.apache.org/ +[karthik]: http://www.scfbm.org/content/8/1/7/abstract +[kr]: http://inundata.org/ +[mc]: http://mercurial.selenic.com/wiki/ +[gityou]: https://www.youtube.com/channel/UCP7RrmoueENv9TZts3HXXtw

    - - Put some cushions on the sofa + + R to GeoJSON

    - - -

    I posted earlier this week about sofa (here), introducing a package I started recently that interacts with CouchDB from R. There's been a fair amount of response at least in terms of page views, so I'll take that as a sign to keep going.

    - -

    One thing that would be nice while you are CouchDB-ing is to interact with local and remote databases. I have incorporated the ability to interact with remote CouchDB databases in many of the functions, not all though. The remote data stores supported right now are Cloudant and Iriscouch.

    - -

    Hadley Wickham suggested that a package called sofa should have something called cushion. And so it must be. It's just a small function, called cushion, which puts a cushion on the sofa, or in reality, sets up your authentication for remote data stores. cushion just writes your username and password to your options list and then the functions look for your authentication details via getOption. Of course these auth details aren't stored permanently - when you restart R you have to write them again to options. You can store them permanently in your .Rprofile file if you like, usally at ~/.Rprofile by putting in entry like options(cloudant.pwd = "mycoolpassword").

    - -
    - -

    Load sofa

    - -
    # install.packages('devtools'); library(devtools); install_github('sofa', 'sckott')
    -library(sofa)
    - -
    - -

    Put a cushion on the sofa - that is, save your auth details

    - -
    cushion(iriscouch_name = "yourusername", iriscouch_pwd = "yourpwd", 
    -    cloudant_name = "yourusername", cloudant_pwd = "yourpwd")
    - -
    - -

    Ping each server

    - -
    sofa_ping()
    - -
    $couchdb
    -[1] "Welcome"
    +    
     
    -$version
    -[1] "1.2.1"
    + **UPDATE** As you can see in Patrick's comment below you can convert to GeoJSON format files with rgdal as an alternative to calling the Ogre web API described below. See [here](https://github.com/patperu/write2geojson/blob/master/write-geojson.R) for example code for converting to GeoJSON with rgdal. -
    sofa_ping("iriscouch")
    +*************** -
    $couchdb
    -[1] "Welcome"
    +GitHub recently introduced the ability to render [GeoJSON][geojson] files on their site as maps [here][post1], and recently introduced [here][post2] support for [TopoJSON][topojson], an extension of GeoJSON can be up to 80% smaller than GeoJSON, support for other file extensions (`.topojson` and `.json`), and you can embed the maps on other sites (so awesome). The underlying maps used on GitHub are [Openstreet Maps][openstreet]. 
     
    -$uuid
    -[1] "f1cb5d2e881bcb529d2eb2d04f548683"
    +A recent blog post showed how to convert .shp or .kml files to GeoJSON to then upload to GitHub [here][ruby]. The approach used Ruby on the command line locally to convert the geospatial files to GeoJSON. 
     
    -$version
    -[1] "1.3.0"
    +Can we do this in R? Perhaps others have already done this, but there's more than one way to do anything, no? 
     
    -$vendor
    -$vendor$version
    -[1] "1.3.0r1"
    +I'm not aware of a converter to GeoJSON within R, but there is a web service that can do this, called [Ogre][ogre]. The service lets you `POST` a file, which then converts to GeoJSON and gives it back to you. Ogre accepts many different file formats: BNA, CSV, DGN, DXF, zipped shapefiles, GeoConcept, GeoJSON, GeoRSS, GML, GMT, KML, MapInfo, and VRT. 
     
    -$vendor$name
    -[1] "Iris Couch"
    +We can use the Ogre API to upload a local geospatial file of various formats and get the GeoJSON back, then put it up on GitHub, and they render the map for you. Sweetness. -
    sofa_ping("cloudant")
    +So here's the protocol. -
    $couchdb
    -[1] "Welcome"
    +***************
     
    -$version
    -[1] "1.0.2"
    +### 1. Load httr. What is httr? For those not in the know it is a simpler wrapper around RCurl, a curl interface for R.
     
    -$cloudant_build
    -[1] "1323"
    -
    +{% highlight r %} +# install.packages('httr') +library(httr) +{% endhighlight %} -

    Now we'll do similar tasks on a local and two remote databases (cloudant and iriscouch)

    +### 2. Here is a function to convert your geospatial files to GeoJSON (with roxygen docs). -

    Create a database

    -
    sofa_createdb(dbname = "hello_world")  # local
    +{% highlight r %} +togeojson <- function(file, writepath = "~") { + url <- "http://ogre.adc4gis.com/convert" + tt <- POST(url, body = list(upload = upload_file(file))) + out <- content(tt, as = "text") + fileConn <- file(writepath) + writeLines(out, fileConn) + close(fileConn) +} +{% endhighlight %} -
      ok 
    -TRUE 
    +### 3. Convert a file to GeoJSON -
    sofa_createdb(dbname = "hello_world", "iriscouch")  # iriscouch
    +**KML** -
      ok 
    -TRUE 
    +In the first line I specify the location of the file on my machine. In the second line the function `togeojson` reads in the file, sends the file to the API endpoint *http://ogre.adc4gis.com/convert*, collects the returned GeoJSON object, and saves the GeoJSON to a file that you specify. Here we are converting a KML file with point occurrences (data collected from [USGS's BISON service](http://bison.usgs.ornl.gov/)). -
    sofa_createdb(dbname = "hello_world", "cloudant")  # cloudant
    -
      ok 
    -TRUE 
    +{% highlight r %} +file <- "~/github/sac/rgeojson/acer_spicatum.kml" +togeojson(file, "~/github/sac/rgeojson/acer_spicatum.geojson") +{% endhighlight %} -
    -

    Listing your databases is a simple task

    +**Shapefiles** -

    List your databases

    +Here, we are converting a zip file containing shape files for *Pinus contorta* (data collected from the USGS [here](http://esp.cr.usgs.gov/data/little/). -
    sofa_listdbs()  # local
    -
     [1] "_replicator"                "_users"                    
    - [3] "alm_couchdb"                "alm_db"                    
    - [5] "cheese"                     "dudedb"                    
    - [7] "example"                    "foobar"                    
    - [9] "foodb"                      "hello_world"               
    -[11] "helloworld"                 "rplos_db"                  
    -[13] "shit"                       "shitty"                    
    -[15] "shitty2"                    "sofadb"                    
    -[17] "test_suite_db"              "test_suite_db/with_slashes"
    -[19] "test_suite_reports"         "testr2couch"               
    -[21] "twitter_db"                
    +{% highlight r %} +file <- "~/github/sac/rgeojson/pinucont.zip" +togeojson(file, "~/github/sac/rgeojson/pinus.geojson") +{% endhighlight %} -
    sofa_listdbs("iriscouch")  # iriscouch
    -
    [1] "_replicator" "_users"      "foobar"      "hello_world" "helloworld" 
    -[6] "mustache"    "stuff"       "thing"      
    +### 4. Then commit and push to GitHub. And this is what they look like on GitHub -
    sofa_listdbs("cloudant")  # cloudant
    +*Acer spicatum* distribution (points) -
    [1] "dudedb"         "foobar"         "hello_world"    "helloworld"    
    -[5] "mustache"       "thingsandstuff"
    + + -
    +*Pinus contorta* distribution (polygons) -

    Write a document to a database

    + + -
    doc <- "{\"name\":\"dude\",\"icecream\":\"rocky road\"}"
    -sofa_writedoc(dbname = "helloworld", doc = doc)  # local
    +*************** -
    $ok
    -[1] TRUE
    +If you want, you can clone a repo from my account. Then do the below. (of course, you must have git installed, and have a GitHub account...)
     
    -$id
    -[1] "da2b0d1eb457dc764a6283fa59001606"
    +First, fork my `rgeojson` repo [here](https://github.com/sckott/rgeojson) to your GitHub account.
     
    -$rev
    -[1] "1-5406480672da172726810767e7d0ead3"
    +Second, in your terminal/command line... -
    sofa_writedoc("iriscouch", dbname = "helloworld", doc = doc)  # iriscouch
    +{% highlight bash %} +git clone https://github.com//rgeojson.git +cd rgeojson +{% endhighlight %} -
    $ok
    -[1] TRUE
    +Third, in R specify the location of either the KML file or the zipped shape files, then call `togeojson` function to convert the KML file to a GeoJSON file (which should output a file *acer_spicatum.geojson*)
     
    -$id
    -[1] "0c0858b75a81c464a74119ca2400135d"
     
    -$rev
    -[1] "1-5406480672da172726810767e7d0ead3"
    +{% highlight r %} +file <- "/path/to/acer_spicatum.kml" +togeojson(file, "~/path/to/write/to/acer_spicatum.geojson") +{% endhighlight %} -
    sofa_writedoc("cloudant", dbname = "helloworld", doc = doc)  # cloudant
    -
    $ok
    -[1] TRUE
    +Fourth, back in the terminal...
     
    -$id
    -[1] "b77808eae8ae8d79ae78a373bf5b02d1"
    +{% highlight bash %}
    +git add acer_spicatum.geojson
    +git commit -a -m 'some cool commit message'
    +git push
    +{% endhighlight %}
     
    -$rev
    -[1] "1-5406480672da172726810767e7d0ead3"
    +Fifth, go to your *rgeojson* repo on GitHub and click on the *acer_spicatum.geojson* file, and the map should render. -
    +*************** -

    There's lots more you can do of course...

    +Look for this functionality to come to the [rbison][rbison] and [rgbif][rgbif] R packages soon. Why is that cool? Think of the workflow: Query for species occurrence data in the BISON or GBIF databases, convert the results to a GeoJSON file, push to GitHub, and you have an awesome interactive map on the web. Not too bad eh. -

    Thoughts? Feelings? Criticism?

    +[post1]: https://github.com/blog/1528-there-s-a-map-for-that +[post2]: https://github.com/blog/1541-geojson-rendering-improvements +[openstreet]: http://www.openstreetmap.org/ +[ruby]: http://ben.balter.com/2013/06/26/how-to-convert-shapefiles-to-geojson-for-use-on-github/ +[geojson]: http://en.wikipedia.org/wiki/GeoJSON +[topojson]: https://github.com/mbostock/topojson +[ogre]: http://ogre.adc4gis.com/ +[rbison]: https://github.com/ropensci/rbison +[rgbif]: https://github.com/ropensci/rgbif
    diff --git a/_site/page21/index.html b/_site/page21/index.html index 62cca2fc27..5d17992f4b 100644 --- a/_site/page21/index.html +++ b/_site/page21/index.html @@ -59,6 +59,273 @@

    Recology

      +
    +

    + + Put some cushions on the sofa + +

    + + + + I posted earlier this week about sofa ([here][blog]), introducing a package I started recently that interacts with CouchDB from R. There's been a fair amount of response at least in terms of page views, so I'll take that as a sign to keep going. + +One thing that would be nice while you are CouchDB-ing is to interact with local *and* remote databases. I have incorporated the ability to interact with remote CouchDB databases in many of the functions, not all though. The remote data stores supported right now are Cloudant and Iriscouch. + +Hadley Wickham suggested that a package called `sofa` should have something called `cushion`. And so it must be. It's just a small function, called `cushion`, which puts a cushion on the sofa, or in reality, sets up your authentication for remote data stores. `cushion` just writes your username and password to your options list and then the functions look for your authentication details via `getOption`. Of course these auth details aren't stored permanently - when you restart R you have to write them again to options. You can store them permanently in your `.Rprofile` file if you like, usally at ~/.Rprofile by putting in entry like options(cloudant.pwd = "mycoolpassword"). + +*************** + +### Load sofa + +{% highlight r %} +# install.packages('devtools'); library(devtools); install_github('sofa', 'sckott') +library(sofa) +{% endhighlight %} + + +*************** + +### Put a cushion on the sofa - that is, save your auth details + + +{% highlight r %} +cushion(iriscouch_name = "yourusername", iriscouch_pwd = "yourpwd", + cloudant_name = "yourusername", cloudant_pwd = "yourpwd") +{% endhighlight %} + + +*************** + +### Ping each server + +{% highlight r %} +sofa_ping() +{% endhighlight %} + + + +{% highlight text %} +$couchdb +[1] "Welcome" + +$version +[1] "1.2.1" +{% endhighlight %} + + + +{% highlight r %} +sofa_ping("iriscouch") +{% endhighlight %} + + + +{% highlight text %} +$couchdb +[1] "Welcome" + +$uuid +[1] "f1cb5d2e881bcb529d2eb2d04f548683" + +$version +[1] "1.3.0" + +$vendor +$vendor$version +[1] "1.3.0r1" + +$vendor$name +[1] "Iris Couch" +{% endhighlight %} + + + +{% highlight r %} +sofa_ping("cloudant") +{% endhighlight %} + + + +{% highlight text %} +$couchdb +[1] "Welcome" + +$version +[1] "1.0.2" + +$cloudant_build +[1] "1323" +{% endhighlight %} + + +*************** + +Now we'll do similar tasks on a local and two remote databases (cloudant and iriscouch) + +### Create a database + +{% highlight r %} +sofa_createdb(dbname = "hello_world") # local +{% endhighlight %} + + + +{% highlight text %} + ok +TRUE +{% endhighlight %} + + + +{% highlight r %} +sofa_createdb(dbname = "hello_world", "iriscouch") # iriscouch +{% endhighlight %} + + + +{% highlight text %} + ok +TRUE +{% endhighlight %} + + + +{% highlight r %} +sofa_createdb(dbname = "hello_world", "cloudant") # cloudant +{% endhighlight %} + + + +{% highlight text %} + ok +TRUE +{% endhighlight %} + + +*************** + +Listing your databases is a simple task + +### List your databases + +{% highlight r %} +sofa_listdbs() # local +{% endhighlight %} + + + +{% highlight text %} + [1] "_replicator" "_users" + [3] "alm_couchdb" "alm_db" + [5] "cheese" "dudedb" + [7] "example" "foobar" + [9] "foodb" "hello_world" +[11] "helloworld" "rplos_db" +[13] "shit" "shitty" +[15] "shitty2" "sofadb" +[17] "test_suite_db" "test_suite_db/with_slashes" +[19] "test_suite_reports" "testr2couch" +[21] "twitter_db" +{% endhighlight %} + + + +{% highlight r %} +sofa_listdbs("iriscouch") # iriscouch +{% endhighlight %} + + + +{% highlight text %} +[1] "_replicator" "_users" "foobar" "hello_world" "helloworld" +[6] "mustache" "stuff" "thing" +{% endhighlight %} + + + +{% highlight r %} +sofa_listdbs("cloudant") # cloudant +{% endhighlight %} + + + +{% highlight text %} +[1] "dudedb" "foobar" "hello_world" "helloworld" +[5] "mustache" "thingsandstuff" +{% endhighlight %} + + +*************** + +### Write a document to a database + +{% highlight r %} +doc <- "{\"name\":\"dude\",\"icecream\":\"rocky road\"}" +sofa_writedoc(dbname = "helloworld", doc = doc) # local +{% endhighlight %} + + + +{% highlight text %} +$ok +[1] TRUE + +$id +[1] "da2b0d1eb457dc764a6283fa59001606" + +$rev +[1] "1-5406480672da172726810767e7d0ead3" +{% endhighlight %} + + + +{% highlight r %} +sofa_writedoc("iriscouch", dbname = "helloworld", doc = doc) # iriscouch +{% endhighlight %} + + + +{% highlight text %} +$ok +[1] TRUE + +$id +[1] "0c0858b75a81c464a74119ca2400135d" + +$rev +[1] "1-5406480672da172726810767e7d0ead3" +{% endhighlight %} + + + +{% highlight r %} +sofa_writedoc("cloudant", dbname = "helloworld", doc = doc) # cloudant +{% endhighlight %} + + + +{% highlight text %} +$ok +[1] TRUE + +$id +[1] "b77808eae8ae8d79ae78a373bf5b02d1" + +$rev +[1] "1-5406480672da172726810767e7d0ead3" +{% endhighlight %} + +*************** + +There's lots more you can do of course... + +Thoughts? Feelings? Criticism? + +[blog]: http://sckott.github.io/2013/06/couch/ + +
    +

    @@ -68,38 +335,44 @@

    -

    Have you heard of DataONE? It stands for the Data Observation Network for Earth, and I am involved in the Community Education and Engagement working group at DataONE. We try to communicate about data, data management, and similar things to scientists and other DataONE stakeholders.

    + Have you heard of [DataONE](http://www.dataone.org/)? It stands for the Data Observation Network for Earth, and I am involved in the [Community Education and Engagement working group](http://www.dataone.org/working_groups/community-education-and-engagement) at DataONE. We try to communicate about data, data management, and similar things to scientists and other DataONE *stakeholders*. -

    At our last meeting, we decided to start a blog aggregator to pull in to one place blog posts about data, data management, and related topics. Those reading this blog have likely heard of R-Bloggers - and there are many more aggregator blogs. We are calling this blog aggregator Coffeehouse - as it's sort of a place to gather to talk/read about ideas. Check it out here. If you blog about data management think about adding your blog to Coffeehouse - go to the Add your blog page to do so. A screenshot:

    +At our last meeting, we decided to start a blog aggregator to pull in to one place blog posts about data, data management, and related topics. Those reading this blog have likely heard of [R-Bloggers](http://www.r-bloggers.com/) - and there are many more aggregator blogs. We are calling this blog aggregator **Coffeehouse** - as it's sort of a place to gather to talk/read about ideas. Check it out [here][coffee]. If you blog about data management think about adding your blog to Coffeehouse - go to the [*Add your blog*][addblog] page to do so. A screenshot: -

    +![](/public/img/coffeehouse.png) -
    +******************** -

    The blogs already added to Coffeehouse:

    +The blogs already added to Coffeehouse: -

    Data Conservancy
    + Data Conservancy
    Data Pub
    DataCite
    Research Remix
    - The Signal: Digital Preservation

    + The Signal: Digital Preservation -
    +******************** -

    The tech/styling details:

    +The tech/styling details: -
      -
    • As is obvious we are using Wordpress.org, with the Magazine Basic theme.
    • -
    • We don't accept comments - when someone clicks on the comments button it sends them back to the original post. This is on purpose so that the authors of the post get the comments on their own site.
    • -
    • On the top of each post there is an alert to tell you the post is syndicated, and gives a link to the original post. You can close this alert if it's annoying to you.
    • -
    • Style - we have strived to use clean and simple styling to make for a nice reading experience. A cluttered website makes reading painful. And using the Twitter Bootstrap WP plugin
    • -
    • Icons: done using the FontAwesome Wordpress Plugin.
    • -
    • Aggregating posts is done using the FeedWordPress plugin.
    • -
    • The add your blog form: using the Nina forms plugin
    • -
    • Analytics: using the Gauges WP plugin
    • -
    ++ As is obvious we are using Wordpress.org, with the Magazine Basic theme. ++ We don't accept comments - when someone clicks on the comments button it sends them back to the original post. This is on purpose so that the authors of the post get the comments on their own site. ++ On the top of each post there is an alert to tell you the post is syndicated, and gives a link to the original post. You can close this alert if it's annoying to you. ++ Style - we have strived to use clean and simple styling to make for a nice reading experience. A cluttered website makes reading painful. And using the [Twitter Bootstrap WP plugin][boot] ++ Icons: done using the [FontAwesome Wordpress Plugin][fawp]. ++ Aggregating posts is done using the [FeedWordPress plugin][fwp]. ++ The add your blog form: using the [Nina forms plugin][ninja] ++ Analytics: using the [Gauges WP plugin][gauges] -

    That's it. Let us know if you have any thoughts/comments.

    +That's it. Let us know if you have any thoughts/comments. + +[coffee]: https://coffeehouse.dataone.org/ +[fawp]: https://github.com/rachelbaker/Font-Awesome-WordPress-Plugin +[addblog]: https://coffeehouse.dataone.org/add-your-blog/ +[fwp]: http://feedwordpress.radgeek.com/ +[ninja]: http://wpninjas.com/ninja-forms/ +[boot]: http://www.icontrolwp.com/our-wordpress-plugins/wordpress-twitter-bootstrap-css-plugin-home/ +[gauges]: http://wordpress.org/plugins/gauges/

    @@ -112,153 +385,204 @@

    -

    It is getting easier to get data directly into R from the web. Often R packages that retrieve data from the web return useful R data structures to users like a data.frame. This is a good thing of course to make things user friendly.

    + It is getting easier to get data directly into R from the web. Often R packages that retrieve data from the web return useful R data structures to users like a data.frame. This is a good thing of course to make things user friendly. + +However, what if you want to drill down into the data that's returned from a query to a database in R? What if you want to get that nice data.frame in R, but you think you may want to look at the raw data later? The raw data from web queries are often JSON or XML data. This type of data, especially JSON, can be easily stored in schemaless so-called NoSQL databases, and queried later. + +A brief aside: What are JSON and XML? This is what JSON looks like (ps if you ever wonder if your JSON is correct, go [here](http://jsonlint.com/)): -

    However, what if you want to drill down into the data that's returned from a query to a database in R? What if you want to get that nice data.frame in R, but you think you may want to look at the raw data later? The raw data from web queries are often JSON or XML data. This type of data, especially JSON, can be easily stored in schemaless so-called NoSQL databases, and queried later.

    +{% highlight bash %} +{ + "name": "joe", + "hobby": "codemonkey", + "lives": [ + { + "city": "San Jose", + "state": "CA" + } + ] +} +{% endhighlight %} -

    A brief aside: What are JSON and XML? This is what JSON looks like (ps if you ever wonder if your JSON is correct, go here):

    +This is what XML looks like: -
    {
    -  "name": "joe",
    -  "hobby": "codemonkey",
    -  "lives": [
    -      {
    -          "city": "San Jose",
    -          "state": "CA"
    -      }
    -  ]
    -}
    +{% highlight bash %} + + joe + codemonkey + + San Jose + CA + +{% endhighlight %} -

    This is what XML looks like:

    +But don't worry if it looks complicated - the project I talk about below, sofa, tries to make the interface to JSON and XML easy. Web APIs almost always return either JSON or XML, so this is the raw data. -
    <?xml version="1.0" encoding="UTF-8" ?>
    -  <name>joe</name>
    -    <hobby>codemonkey</hobby>
    -    <lives>
    -        <city>San Jose</city>
    -        <state>CA</state>
    -    </lives>
    +So here's the use case I imagine, or workflow: -

    But don't worry if it looks complicated - the project I talk about below, sofa, tries to make the interface to JSON and XML easy. Web APIs almost always return either JSON or XML, so this is the raw data.

    ++ Query a database on the web, and choose to write the raw data to a local database. ++ Do whatever you want with the output R object - analyze, visualize, etc. ++ Now you want to go back and search through some of the raw data. But, that query took an hour. Since you wrote it to a local database, you can search the data. ++ If you hadn't written it locally, you would have to make a new web call. -

    So here's the use case I imagine, or workflow:

    +Note that if you are doing calls to web APIs that get small amounts of data you don't need to worry too much as you can easily just do the call again. -
      -
    • Query a database on the web, and choose to write the raw data to a local database.
    • -
    • Do whatever you want with the output R object - analyze, visualize, etc.
    • -
    • Now you want to go back and search through some of the raw data. But, that query took an hour. Since you wrote it to a local database, you can search the data.
    • -
    • If you hadn't written it locally, you would have to make a new web call.
    • -
    +I've started an R package to interact with the NoSQL database [CouchDB][couch]. CouchDB is a schemaless database that speaks JSON, so you can store JSON and get back JSON (don't worry XML, we got you covered - we can just wrap the XML in JSON before putting it in CouchDB). What's especially cool is you can interact with CouchDB via [a RESTful API][restapi]. CouchDB doesn't have full text search built in (though you can build map-reduce *Views*, basically preset queries on the database), so I added functions (and docs to help) to interact with the [CouchDB River plugin][couchriver] for [Elasticsearch][elastic], which provides powerful full text search via an API interface. But nevermind the tech details - all this just means you can search on the full text of your stored data. -

    Note that if you are doing calls to web APIs that get small amounts of data you don't need to worry too much as you can easily just do the call again.

    +There are plenty of databases you can interact with from R, so why CouchDB? For one, it makes a lot of sense to write to a NoSQL database since this blog post is dealing with a use case writing JSON, which isn't a good fit for databases like MySQL, SQLite, PostgreSQL, etc. ([though postgres allows you to write JSON][postgres]). It didn't have to be CouchDB, but at least to me it seems relatively easy to install, you can interact with it via an HTTP API (if you're into that, which I am), and it has a nice web interface (navigate to [http://localhost:5984/_utils/](http://localhost:5984/_utils/) after starting `couchdb`). -

    I've started an R package to interact with the NoSQL database CouchDB. CouchDB is a schemaless database that speaks JSON, so you can store JSON and get back JSON (don't worry XML, we got you covered - we can just wrap the XML in JSON before putting it in CouchDB). What's especially cool is you can interact with CouchDB via a RESTful API. CouchDB doesn't have full text search built in (though you can build map-reduce Views, basically preset queries on the database), so I added functions (and docs to help) to interact with the CouchDB River plugin for Elasticsearch, which provides powerful full text search via an API interface. But nevermind the tech details - all this just means you can search on the full text of your stored data.

    +Is this for the casual R user? Probably not. But, I imagine there are R users out there that want some more flexibility when it comes to interacting with web data in R. It is nice and tidy to get back an R data.frame from a web call, but having the raw data at your fingertips could be super powerful. I'll describe using an R package to interact with a web database with `sofa` baked in, and discuss a bit about the functions within `sofa`. -

    There are plenty of databases you can interact with from R, so why CouchDB? For one, it makes a lot of sense to write to a NoSQL database since this blog post is dealing with a use case writing JSON, which isn't a good fit for databases like MySQL, SQLite, PostgreSQL, etc. (though postgres allows you to write JSON). It didn't have to be CouchDB, but at least to me it seems relatively easy to install, you can interact with it via an HTTP API (if you're into that, which I am), and it has a nice web interface (navigate to http://localhost:5984/_utils/ after starting couchdb).

    +*************** -

    Is this for the casual R user? Probably not. But, I imagine there are R users out there that want some more flexibility when it comes to interacting with web data in R. It is nice and tidy to get back an R data.frame from a web call, but having the raw data at your fingertips could be super powerful. I'll describe using an R package to interact with a web database with sofa baked in, and discuss a bit about the functions within sofa.

    +### First start CouchDB in your terminal -
    +You can do this from anywhere in your directory. See [here](http://couchdb.apache.org/) for instructions on how to install CouchDB. -

    First start CouchDB in your terminal

    +```bash +couchdb +``` -

    You can do this from anywhere in your directory. See here for instructions on how to install CouchDB.

    -
    couchdb
    -
    -

    Then start elasticsearch in your terminal

    +### Then start elasticsearch in your terminal -

    See here for instructions on how to install Elasticsearch and the River CouchDB plugin.

    -
    cd /usr/local/elasticsearch
    +See [here](https://github.com/sckott/sofa) for instructions on how to install Elasticsearch and the River CouchDB plugin.
    +
    +```bash
    +cd /usr/local/elasticsearch
     bin/elasticsearch -f
    -
    -
    +``` + +*************** + +### Install sofa + +{% highlight r %} +# Uncomment these lines if you don't have these packages installed +# install.packages('devtools') library(devtools) install_github('sofa', +# 'schamberlain') install_github('alm', 'ropensci', ref='couch') +library(sofa) +library(alm) +{% endhighlight %} + + +*************** -

    Install sofa

    +### Simultaneously write data to CouchDB along with API calls using the alm package to get altmetrics data on PLoS papers. Ping to make sure CouchDB is on -
    # Uncomment these lines if you don't have these packages installed
    -# install.packages('devtools') library(devtools) install_github('sofa',
    -# 'schamberlain') install_github('alm', 'ropensci', ref='couch')
    -library(sofa)
    -library(alm)
    +{% highlight r %} +sofa_ping() +{% endhighlight %} -
    -

    Simultaneously write data to CouchDB along with API calls using the alm package to get altmetrics data on PLoS papers. Ping to make sure CouchDB is on

    -
    sofa_ping()
    +{% highlight text %} + couchdb version +"Welcome" "1.2.1" +{% endhighlight %} -
      couchdb   version 
    -"Welcome"   "1.2.1" 
    +*************** -
    +### Create a new database -

    Create a new database

    +{% highlight r %} +sofa_createdb(dbname = "alm_db") +{% endhighlight %} -
    sofa_createdb(dbname = "alm_db")
    -
      ok 
    -TRUE 
    -
    +{% highlight text %} + ok +TRUE +{% endhighlight %} -

    Write couchdb database name to options

    +*************** -
    options(couch_db_name = "alm_db")
    +### Write couchdb database name to options -
    +{% highlight r %} +options(couch_db_name = "alm_db") +{% endhighlight %} -

    List the databases

    -
    sofa_listdbs()
    +*************** -
     [1] "_replicator"                "_users"                    
    - [3] "alm_couchdb"                "alm_db"                    
    - [5] "dudedb"                     "example"                   
    - [7] "poop"                       "rplos_db"                  
    - [9] "shit"                       "shitty"                    
    -[11] "shitty2"                    "test_suite_db"             
    -[13] "test_suite_db/with_slashes" "test_suite_reports"        
    -[15] "testr2couch"                "twitter_db"                
    +### List the databases -
    +{% highlight r %} +sofa_listdbs() +{% endhighlight %} -

    Search for altmetrics normally, w/o writing to a database

    -
    head(alm(doi = "10.1371/journal.pone.0029797"))
    -
              .id pdf html shares groups comments likes citations total
    +{% highlight text %}
    + [1] "_replicator"                "_users"                    
    + [3] "alm_couchdb"                "alm_db"                    
    + [5] "dudedb"                     "example"                   
    + [7] "poop"                       "rplos_db"                  
    + [9] "shit"                       "shitty"                    
    +[11] "shitty2"                    "test_suite_db"             
    +[13] "test_suite_db/with_slashes" "test_suite_reports"        
    +[15] "testr2couch"                "twitter_db"                
    +{% endhighlight %}
    +
    +***************
    +
    +### Search for altmetrics normally, w/o writing to a database
    +
    +{% highlight r %}
    +head(alm(doi = "10.1371/journal.pone.0029797"))
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +          .id pdf html shares groups comments likes citations total
     1   bloglines  NA   NA     NA     NA       NA    NA         0     0
     2   citeulike  NA   NA      1     NA       NA    NA        NA     1
     3    connotea  NA   NA     NA     NA       NA    NA         0     0
     4    crossref  NA   NA     NA     NA       NA    NA         6     6
     5      nature  NA   NA     NA     NA       NA    NA         4     4
    -6 postgenomic  NA   NA     NA     NA       NA    NA         0     0
    +6 postgenomic NA NA NA NA NA NA 0 0 +{% endhighlight %} + -
    +*************** -

    Search for altmetrics normally, while writing to a database

    +### Search for altmetrics normally, while writing to a database -
    head(alm(doi = "10.1371/journal.pone.0029797", write2couch = TRUE))
    +{% highlight r %} +head(alm(doi = "10.1371/journal.pone.0029797", write2couch = TRUE)) +{% endhighlight %} -
              .id pdf html shares groups comments likes citations total
    +
    +
    +{% highlight text %}
    +          .id pdf html shares groups comments likes citations total
     1   bloglines  NA   NA     NA     NA       NA    NA         0     0
     2   citeulike  NA   NA      1     NA       NA    NA        NA     1
     3    connotea  NA   NA     NA     NA       NA    NA         0     0
     4    crossref  NA   NA     NA     NA       NA    NA         6     6
     5      nature  NA   NA     NA     NA       NA    NA         4     4
    -6 postgenomic  NA   NA     NA     NA       NA    NA         0     0
    +6 postgenomic NA NA NA NA NA NA 0 0 +{% endhighlight %} + + +*************** -
    +### Make lots of calls, and write them simultaneously -

    Make lots of calls, and write them simultaneously

    +{% highlight r %} +# install_github('rplos', 'ropensci') +library(rplos) +dois <- searchplos(terms = "evolution", fields = "id", limit = 100) +out <- alm(doi = as.character(dois[, 1]), write2couch = TRUE) +lapply(out[1:2], head) +{% endhighlight %} -
    # install_github('rplos', 'ropensci')
    -library(rplos)
    -dois <- searchplos(terms = "evolution", fields = "id", limit = 100)
    -out <- alm(doi = as.character(dois[, 1]), write2couch = TRUE)
    -lapply(out[1:2], head)
    -
    $`01`
    +
    +{% highlight text %}
    +$`01`
               .id pdf html shares groups comments likes citations total
     1   bloglines  NA   NA     NA     NA       NA    NA         0     0
     2   citeulike  NA   NA      0     NA       NA    NA        NA     0
    @@ -274,253 +598,187 @@ 

    Make lots of calls, and write them simultaneously

    3 connotea NA NA NA NA NA NA 0 0 4 crossref NA NA NA NA NA NA 2 2 5 nature NA NA NA NA NA NA 0 0 -6 postgenomic NA NA NA NA NA NA 0 0
    +6 postgenomic NA NA NA NA NA NA 0 0 +{% endhighlight %} -
    -

    Writing data to CouchDB does take a bit longer

    +*************** -
    system.time(alm(doi = as.character(dois[, 1])[1:60], write2couch = FALSE))
    +### Writing data to CouchDB does take a bit longer -
       user  system elapsed 
    -  1.739   0.016   4.554 
    +{% highlight r %} +system.time(alm(doi = as.character(dois[, 1])[1:60], write2couch = FALSE)) +{% endhighlight %} -
    system.time(alm(doi = as.character(dois[, 1])[1:60], write2couch = TRUE))
    -
       user  system elapsed 
    -  3.579   0.062   6.460 
    -
    +{% highlight text %} + user system elapsed + 1.739 0.016 4.554 +{% endhighlight %} -

    Search using elasticsearch

    -

    tell elasticsearch to start indexing your database

    -
    elastic_start(dbname = "alm_db")
    +{% highlight r %} +system.time(alm(doi = as.character(dois[, 1])[1:60], write2couch = TRUE)) +{% endhighlight %} -
    $ok
    -[1] TRUE
    -
    -

    Search your database

    +{% highlight text %} + user system elapsed + 3.579 0.062 6.460 +{% endhighlight %} -
    out <- elastic_search(dbname = "alm_db", q = "twitter", parse_ = TRUE)
    -out$hits$total
    -
    [1] 679
    +*************** -
    +### Search using elasticsearch +#### tell elasticsearch to start indexing your database -

    Using views

    +{% highlight r %} +elastic_start(dbname = "alm_db") +{% endhighlight %} -

    Write a view - here letting key be the default of null

    -
    sofa_view_put(dbname = "alm_db", design_name = "myview", value = "doc.baseurl")
    -
    $ok
    +{% highlight text %}
    +$ok
     [1] TRUE
    +{% endhighlight %}
     
    -$id
    -[1] "_design/myview"
    -
    -$rev
    -[1] "1-e7c17cff1b96e4595c3781da53e16ad8"
    - -
    - -

    Get info on your new view

    - -
    sofa_view_get(dbname = "alm_db", design_name = "myview")
    - -
    $`_id`
    -[1] "_design/myview"
     
    -$`_rev`
    -[1] "1-e7c17cff1b96e4595c3781da53e16ad8"
    -
    -$views
    -$views$foo
    -                                    map 
    -"function(doc){emit(null,doc.baseurl)}" 
    - -
    +*************** -

    Get data using a view

    +#### Search your database -
    out <- sofa_view_search(dbname = "alm_db", design_name = "myview")
    -length(out$rows)  # 160 results
    +{% highlight r %} +out <- elastic_search(dbname = "alm_db", q = "twitter", parse_ = TRUE) +out$hits$total +{% endhighlight %} -
    [1] 161
    -
    sapply(out$rows, function(x) x$value)[1:5]  # the values, just the API call URLs
    -
    [1] "http://alm.plos.org/api/v3/articles"
    -[2] "http://alm.plos.org/api/v3/articles"
    -[3] "http://alm.plos.org/api/v3/articles"
    -[4] "http://alm.plos.org/api/v3/articles"
    -[5] "http://alm.plos.org/api/v3/articles"
    +{% highlight text %} +[1] 679 +{% endhighlight %} -
    -

    Delete the view

    +*************** -
    sofa_view_del(dbname = "alm_db", design_name = "myview")
    +### Using views -
    [1] ""
    +#### Write a view - here letting key be the default of null -
    +{% highlight r %} +sofa_view_put(dbname = "alm_db", design_name = "myview", value = "doc.baseurl") +{% endhighlight %} -

    What now?

    -

    Well, if no one uses this, then probably nothing. Though, if people think this could be useful:

    -
      -
    • It would be cool to make easy hooks into any package making web calls to allow users to write data to CouchDB if they choose to, sort of like the example above with rplos.
    • -
    • Perhaps automate some of the setup for CouchDB for users, making system calls so they don't have to.
    • -
    • Performance: As shown above, simultaneously writing data to CouchDB takes longer than not doing so - removing this time difference will make writing to couch more palatable.
    • -
    - -

    What do you think?

    - -

    What is your reaction to this post? Do you have a need for this sort of tool? Do you have similar use cases that could be addressed with sofa?

    - -
    - -
    -

    - - Fylopic, an R wrapper to Phylopic - -

    - - - -

    What is PhyloPic?

    - -

    PhyloPic is an awesome new service - I'll let the creator, Mike Keesey, explain what it is (paraphrasing here):

    - -
    -

    PhyloPic stores silhouette images of organisms, and each image is associated with taxonomic names, and stores the taxonomy of all taxa, allowing searching by taxonomic names. Anyone can submit silhouettes to PhyloPic.

    -
    - -

    What is a silhouette? It's like this:

    - -

    A silhouette

    - -

    by Gareth Monger

    - -

    What makes PhyloPic not just awesome, but super awesome? All or most images are licensed under Creative Commons licenses. This means you can use the silhouettes without having to ask or pay - just attribute.

    - -
    - -

    What is fylopic?

    - -

    The idea behind Fylopic is to create modular bits and pieces (i.e., functions) to allow you to add silhouettes to not only ggplot2 plots, but base plots as well. That is, you can simply load fylopic in your R session, and add some silhouettes to your phylogeny, or your barchart, etc. - that is, fylopic is meant to be a helper in your workflow to add in silhouettes to visualizations.

    - -

    Some people prefer base plots while others prefer ggplot2 plots (me!), so it would be nice to have both options. Phylogenies at the moment render faster in base plots. I don't yet have implementations for base plots, but will come soon, or you can send a pull request to add it.

    - -

    One interesting use case could be to be able to get a set of silhouettes, then get a phylogeny for taxa associatd with the silhouettess using the NCBI taxonomy, but it's not easily available yet (though I may be able to use Ben Morris' phylocommons soon. This isn't doable yet, so in the example below the function make_phylo creates a phylogeny using ape::rcoal.

    +{% highlight text %} +$ok +[1] TRUE -

    You could also do the reverse -> you have a phylogeny and then you could search Phylopic for silhouettes.

    +$id +[1] "_design/myview" -
    +$rev +[1] "1-e7c17cff1b96e4595c3781da53e16ad8" +{% endhighlight %} -

    Info

    -

    Check out the Phylopic website here, and Phylopic API developer documentation here.

    +*************** -

    Also check out Ben Morris' Python wrapper to Phylopic here.

    +#### Get info on your new view -
    +{% highlight r %} +sofa_view_get(dbname = "alm_db", design_name = "myview") +{% endhighlight %} -

    What can you do with fylopic?

    -
    -

    Install fylopic

    +{% highlight text %} +$`_id` +[1] "_design/myview" -
    install.packages("devtools")
    -library(devtools)
    -install_github("fylopic", "sckott")
    +$`_rev` +[1] "1-e7c17cff1b96e4595c3781da53e16ad8" -
    library(fylopic, quietly = TRUE)
    +$views +$views$foo + map +"function(doc){emit(null,doc.baseurl)}" +{% endhighlight %} -
    -

    Plot a phylogeny with silhouettes at the tips

    +*************** -

    Here, I search for names based on keyword Homo sapiens - which returns many matche codes. With those results we search for any silhouettes associated with those codes. Then we download images. Finally, make a phylogeny with the silhouettes at the tips. Note that in this eample the phylogeny is just a random coalescent tree made using ape::rcoal - obviously, in the real world you'd want to do something more useful.

    +#### Get data using a view -
    ## search on Homo sapiens
    -searchres <- search_text(text = "Homo sapiens", options = "names")
    +{% highlight r %}
    +out <- sofa_view_search(dbname = "alm_db", design_name = "myview")
    +length(out$rows)  # 160 results
    +{% endhighlight %}
     
    -### which returns UUIDs
    -searchres[1:3]
    -
    [1] "74aea16b-666b-497a-b2cb-72201ad75a8e"
    -[2] "1ee65cf3-53db-4a52-9960-a9f7093d845d"
    -[3] "cc9ad8ee-3a82-4add-8d50-bc78f4ff6956"
    -
    ## search for images based on the UUIds
    -output <- search_images(uuid = searchres, options = c("pngFiles", "credit", 
    -    "canonicalName"))
    +{% highlight text %}
    +[1] 161
    +{% endhighlight %}
     
    -### we got eight matches
    -output
    -
    $`15444b9c-f17f-4d6e-89b5-5990096bcfb0`
    -$`15444b9c-f17f-4d6e-89b5-5990096bcfb0`$supertaxa
    -[1] "e547cd01-7dd1-495b-8239-52cf9971a609"
    -[2] "bd88f674-6976-4cb2-a46e-e6a12a8ba463"
     
    +{% highlight r %}
    +sapply(out$rows, function(x) x$value)[1:5]  # the values, just the API call URLs
    +{% endhighlight %}
     
    -$`fedf0e5f-f20a-442c-accf-eb84a3af8c6b`
    -$`fedf0e5f-f20a-442c-accf-eb84a3af8c6b`$supertaxa
    -[1] "e547cd01-7dd1-495b-8239-52cf9971a609"
    -[2] "bd88f674-6976-4cb2-a46e-e6a12a8ba463"
     
     
    -$`a88d3a4c-44d3-409e-87b6-516bd188c709`
    -$`a88d3a4c-44d3-409e-87b6-516bd188c709`$supertaxa
    -[1] "e547cd01-7dd1-495b-8239-52cf9971a609"
    -[2] "bd88f674-6976-4cb2-a46e-e6a12a8ba463"
    +{% highlight text %}
    +[1] "http://alm.plos.org/api/v3/articles"
    +[2] "http://alm.plos.org/api/v3/articles"
    +[3] "http://alm.plos.org/api/v3/articles"
    +[4] "http://alm.plos.org/api/v3/articles"
    +[5] "http://alm.plos.org/api/v3/articles"
    +{% endhighlight %}
     
     
    -$`d88164ec-3152-444b-b41c-4757a344a764`
    -$`d88164ec-3152-444b-b41c-4757a344a764`$supertaxa
    -[1] "9c6af553-390c-4bdd-baeb-6992cbc540b1"
    +***************
     
    +#### Delete the view
     
    -$`da5eaeb7-1ed2-4b2e-ad4a-49993881d706`
    -$`da5eaeb7-1ed2-4b2e-ad4a-49993881d706`$supertaxa
    -[1] "9c6af553-390c-4bdd-baeb-6992cbc540b1"
    +{% highlight r %} +sofa_view_del(dbname = "alm_db", design_name = "myview") +{% endhighlight %} -
    ## download images
    -myobjs <- get_image(uuids = output, size = "128")
     
    -## make the phylogeny
    -make_phylo(pngobj = myobjs)
    -

    center

    +{% highlight text %} +[1] "" +{% endhighlight %} -
    -

    Plot a silhouette behind a plot

    +*************** -

    Notice in the below example that you can use normal ggplot2 syntax, and simply add another layer (add_phylopic from fylopic) to the plot.

    +## What now? -
    library(ggplot2)
    -img <- get_image("27356f15-3cf8-47e8-ab41-71c6260b2724", size = "512")[[1]]
    -qplot(x = Sepal.Length, y = Sepal.Width, data = iris, geom = "point") + add_phylopic(img)
    +Well, if no one uses this, then probably nothing. Though, if people think this could be useful: -

    center

    ++ It would be cool to make easy hooks into any package making web calls to allow users to write data to CouchDB if they choose to, sort of like the example above with rplos. ++ Perhaps automate some of the setup for CouchDB for users, making system calls so they don't have to. ++ Performance: As shown above, simultaneously writing data to CouchDB takes longer than not doing so - removing this time difference will make writing to couch more palatable. -
    +## What do you think? -

    What's next?

    +What is your reaction to this post? Do you have a need for this sort of tool? Do you have similar use cases that could be addressed with `sofa`? -

    This is a side project, so if anyone has interest in helping please do contribute code, report bugs, request features, etc.

    +[couchriver]: https://github.com/elasticsearch/elasticsearch-river-couchdb/blob/master/README.md +[elastic]: http://www.elasticsearch.org/ +[restapi]: http://docs.couchdb.org/en/latest/api-basics.html +[couch]: http://couchdb.apache.org/ +[r4couch]: (https://github.com/wactbprot/R4CouchDB) +[postgres]: http://wiki.postgresql.org/wiki/What's_new_in_PostgreSQL_9.2#JSON_datatype
    diff --git a/_site/page22/index.html b/_site/page22/index.html index 7f00128e2f..ec3d0b75d1 100644 --- a/_site/page22/index.html +++ b/_site/page22/index.html @@ -61,575 +61,761 @@

    Recology

    - - BISON USGS species occurrence data + + Fylopic, an R wrapper to Phylopic

    - + -

    The USGS recently released a way to search for and get species occurrence records for the USA. The service is called BISON (Biodiversity Information Serving Our Nation). The service has a web interface for human interaction in a browser, and two APIs (application programming interface) to allow machines to interact with their database. One of the APIs allows you to search and retrieve data, and the other gives back maps as either a heatmap or a species occurrence map. The latter is more appropriate for working in a browser, so I'll leave that to the web app folks.

    + ## What is PhyloPic? -

    The Core Science Analytics and Synthesis (CSAS) program of the US Geological Survey are responsible for BISON, and are the US node of the Global Biodiversity Information Facility (GBIF). BISON data is nested within that of GBIF, but has (or wil have?) additional data not in GBIF, as described on their About page:

    +PhyloPic is an awesome new service - I'll let the creator, [Mike Keesey](http://tmkeesey.net/), explain what it is (paraphrasing here): -
    -

    BISON has been initiated with the 110 million records GBIF makes available from the U.S. and is integrating millions more records from other sources each year

    -
    +> PhyloPic stores silhouette images of organisms, and each image is associated with taxonomic names, and stores the taxonomy of all taxa, allowing searching by taxonomic names. Anyone can submit silhouettes to PhyloPic. -

    Have a look at their Data providers and Statistics tabs on the BISON website, which list where data comes from and how many searches and downloads have been done on each data provider.

    +What is a silhouette? It's like this: -

    We (rOpenSci) started an R package to interact with the BISON search API >> rbison. You may be thinking, but if the data in BISON is also in GBIF, why both making another R package for BISON? Good question. As I just said, BISON will have some data GBIF won't have. Also, the services (search API and map service) are different than those of GBIF.

    +![A silhouette](http://phylopic.org/assets/images/submissions/bedd622a-4de2-4067-8c70-4aa44326d229.128.png) -

    Check out the package on GitHub here https://github.com/ropensci/rbison.

    +*by Gareth Monger* -

    Here is a quick run through of some things you can do with rbison.

    -
    +What makes PhyloPic not just awesome, but super awesome? All or most images are licensed under [Creative Commons licenses](http://creativecommons.org/). This means you can use the silhouettes without having to ask or pay - just attribute. -

    Install ribson

    +*************** -
    # Install rbison from GitHub using devtools, uncomment to install
    -# install.packages('devtools') library(devtools) install_github('rbison',
    -# 'ropensci')
    -library(rbison)
    +## What is fylopic? -
    +The idea behind Fylopic is to create modular bits and pieces (i.e., functions) to allow you to add silhouettes to not only ggplot2 plots, but base plots as well. That is, you can simply load fylopic in your R session, and add some silhouettes to your phylogeny, or your barchart, etc. - that is, `fylopic` is meant to be a helper in your workflow to add in silhouettes to visualizations. -

    Search the BISON database for, of course, bison

    +Some people prefer base plots while others prefer ggplot2 plots (me!), so it would be nice to have both options. Phylogenies at the moment render faster in base plots. I don't yet have implementations for base plots, but will come soon, or you can send a pull request to add it. -
    # Do the search
    -out <- bison(species = "Bison bison", type = "scientific_name", start = 0, count = 10)
    +One interesting use case could be to be able to get a set of silhouettes, then get a phylogeny for taxa associatd with the silhouettess using the NCBI taxonomy, but it's not easily available yet (though I may be able to use [Ben Morris' phylocommons](https://github.com/bendmorris/phylocommons) soon. This isn't doable yet, so in the example below the function `make_phylo` creates a phylogeny using `ape::rcoal`.
     
    -# Check that the returned object is the right class ('bison')
    -class(out)
    +You could also do the reverse -> you have a phylogeny and then you could search Phylopic for silhouettes. -
    [1] "bison"
    +*************** -

    Get a summary of the data

    +## Info -
    bison_data(out)
    +Check out the Phylopic website [here](http://phylopic.org/), and Phylopic API developer documentation [here](http://phylopic.org/api/). -
      total observation fossil specimen unknown
    -1   761          30      4      709      18
    +Also check out Ben Morris' Python wrapper to Phylopic [here](https://github.com/bendmorris/python-phylopic). -

    Summary by counties (just the first 6 rows)

    +*************** -
    head(bison_data(input = out, datatype = "counties"))
    +## What can you do with fylopic? -
      record_id total county_name      state
    -1     48295     7    Lipscomb      Texas
    -2     41025    15      Harney     Oregon
    -3     49017     8    Garfield       Utah
    -4     35031     2    McKinley New Mexico
    -5     56013     1     Fremont    Wyoming
    -6     40045     2       Ellis   Oklahoma
    +*************** -

    Summary of states

    +#### Install fylopic -
    bison_data(input = out, datatype = "states")
    +{% highlight r %} +install.packages("devtools") +library(devtools) +install_github("fylopic", "sckott") +{% endhighlight %} -
          record_id total county_fips
    -1    Washington     1          53
    -2         Texas     8          48
    -3    New Mexico     8          35
    -4          Iowa     1          19
    -5       Montana     9          30
    -6       Wyoming   155          56
    -7        Oregon    15          41
    -8      Oklahoma    14          40
    -9        Kansas    10          20
    -10      Arizona     1          04
    -11       Alaska    29          02
    -12         Utah    16          49
    -13     Colorado    17          08
    -14     Nebraska     1          31
    -15 South Dakota    61          46
    -
    -

    Map the results

    +{% highlight r %} +library(fylopic, quietly = TRUE) +{% endhighlight %} + + +*************** + +#### Plot a phylogeny with silhouettes at the tips + +Here, I search for names based on keyword *Homo sapiens* - which returns many matche codes. With those results we search for any silhouettes associated with those codes. Then we download images. Finally, make a phylogeny with the silhouettes at the tips. Note that in this eample the phylogeny is just a random coalescent tree made using `ape::rcoal` - obviously, in the real world you'd want to do something more useful. + + +{% highlight r %} +## search on Homo sapiens +searchres <- search_text(text = "Homo sapiens", options = "names") + +### which returns UUIDs +searchres[1:3] +{% endhighlight %} + + + +{% highlight text %} +[1] "74aea16b-666b-497a-b2cb-72201ad75a8e" +[2] "1ee65cf3-53db-4a52-9960-a9f7093d845d" +[3] "cc9ad8ee-3a82-4add-8d50-bc78f4ff6956" +{% endhighlight %} + + + +{% highlight r %} + +## search for images based on the UUIds +output <- search_images(uuid = searchres, options = c("pngFiles", "credit", + "canonicalName")) + +### we got eight matches +output +{% endhighlight %} -
    # Search for Ursus americanus (american black bear)
    -out <- bison(species = "Ursus americanus", type = "scientific_name", start = 0, 
    -    count = 200)
     
    -# Sweet, got some data
    -bison_data(out)
    -
      total observation fossil specimen literature unknown centroid
    -1  3792          59    125     3522         47      39       78
    +{% highlight text %} +$`15444b9c-f17f-4d6e-89b5-5990096bcfb0` +$`15444b9c-f17f-4d6e-89b5-5990096bcfb0`$supertaxa +[1] "e547cd01-7dd1-495b-8239-52cf9971a609" +[2] "bd88f674-6976-4cb2-a46e-e6a12a8ba463" -

    Make some maps! Note that right now the county and state maps just plot the conterminous lower 48. The map of individual occurrences shows the lower 48 + Alaska

    -
    # By county
    -bisonmap(out, tomap = "county")
    +$`fedf0e5f-f20a-442c-accf-eb84a3af8c6b` +$`fedf0e5f-f20a-442c-accf-eb84a3af8c6b`$supertaxa +[1] "e547cd01-7dd1-495b-8239-52cf9971a609" +[2] "bd88f674-6976-4cb2-a46e-e6a12a8ba463" -

    center

    -
    # By state
    -bisonmap(out, tomap = "state")
    +$`a88d3a4c-44d3-409e-87b6-516bd188c709` +$`a88d3a4c-44d3-409e-87b6-516bd188c709`$supertaxa +[1] "e547cd01-7dd1-495b-8239-52cf9971a609" +[2] "bd88f674-6976-4cb2-a46e-e6a12a8ba463" -

    center

    -
    # Individual locations
    -bisonmap(out)
    +$`d88164ec-3152-444b-b41c-4757a344a764` +$`d88164ec-3152-444b-b41c-4757a344a764`$supertaxa +[1] "9c6af553-390c-4bdd-baeb-6992cbc540b1" -
    ## Rendering map...plotting 199 points
    -

    center

    +$`da5eaeb7-1ed2-4b2e-ad4a-49993881d706` +$`da5eaeb7-1ed2-4b2e-ad4a-49993881d706`$supertaxa +[1] "9c6af553-390c-4bdd-baeb-6992cbc540b1" +{% endhighlight %} -
    -

    When plotting occurrences, you can pass additional arguments into the bisonmap function.

    -

    For example, you can jitter the points

    +{% highlight r %} -
    bisonmap(input = out, geom = geom_jitter)
    +## download images +myobjs <- get_image(uuids = output, size = "128") -
    ## Rendering map...plotting 199 points
    +## make the phylogeny +make_phylo(pngobj = myobjs) +{% endhighlight %} -

    center

    +![center](/public/img/2013-06-01-fylopic/unnamed-chunk-1.png) -

    And you can specify by how much you want the points to jitter (here an extreme example to make it obvious)

    -
    library(ggplot2)
    -bisonmap(input = out, geom = geom_jitter, jitter = position_jitter(width = 5))
    +*************** -
    ## Rendering map...plotting 199 points
    +#### Plot a silhouette behind a plot -

    center

    +Notice in the below example that you can use normal `ggplot2` syntax, and simply add another layer (`add_phylopic` from `fylopic`) to the plot. -
    -

    Let us know if you have any feature requests or find bugs at our GitHub Issues page.

    +{% highlight r %} +library(ggplot2) +img <- get_image("27356f15-3cf8-47e8-ab41-71c6260b2724", size = "512")[[1]] +qplot(x = Sepal.Length, y = Sepal.Width, data = iris, geom = "point") + add_phylopic(img) +{% endhighlight %} + +![center](/public/img/2013-06-01-fylopic/unnamed-chunk-2.png) + + +*************** + +## What's next? + +This is a side project, so if anyone has interest in helping please do contribute code, report bugs, request features, etc.

    - - Scholarly metadata in R + + BISON USGS species occurrence data

    - + -

    Scholarly metadata - the meta-information surrounding articles - can be super useful. Although metadata does not contain the full content of articles, it contains a lot of useful information, including title, authors, abstract, URL to the article, etc.

    + The USGS recently released a way to search for and get species occurrence records for the USA. The service is called [BISON](http://bison.usgs.ornl.gov/) (Biodiversity Information Serving Our Nation). The service has [a web interface](http://bison.usgs.ornl.gov/) for human interaction in a browser, and [two APIs](http://bison.usgs.ornl.gov/services.html) (application programming interface) to allow machines to interact with their database. One of the APIs allows you to search and retrieve data, and the other gives back maps as either a heatmap or a species occurrence map. The latter is more appropriate for working in a browser, so I'll leave that to the web app folks. -

    One of the largest sources of metadata is provided via the Open Archives Initiative Protocol for Metadata Harvesting or OAI-PMH. Many publishers, provide their metadata through their own endpoint, and implement the standard OAI-PMH methods: GetRecord, Identify, ListIdentifiers, ListMetadataFormats, ListRecords, and ListSets. Many providers use OAI-PMH, including DataCite, Dryad, and PubMed.

    +The Core Science Analytics and Synthesis (CSAS) program of the US Geological Survey are responsible for BISON, and are the US node of the Global Biodiversity Information Facility (GBIF). BISON data is nested within that of GBIF, but has (or wil have?) additional data not in GBIF, as described on their *About* page: -

    Some data-/article-providers provide their metadata via their own APIs. For example, Nature Publishing Group provides their own metadata API here in non OAI-PMH format; you can get PLoS metadata through their search API, and the BHL (see below) provides their own custom metadata service.

    +> BISON has been initiated with the 110 million records GBIF makes available from the U.S. and is integrating millions more records from other sources each year -

    In addition, CrossRef provides a number of metadata search services: metadata search and openurl.

    +Have a look at their *Data providers* and *Statistics* tabs on the BISON website, which list where data comes from and how many searches and downloads have been done on each data provider. -

    What about the other publishers? (please tell me if I'm wrong about these three)

    +We (rOpenSci) started an R package to interact with the BISON search API >> `rbison`. You may be thinking, but if the data in BISON is also in GBIF, why both making another R package for BISON? Good question. As I just said, BISON will have some data GBIF won't have. Also, the services (search API and map service) are different than those of GBIF. -
      -
    • Springer has a metadata API, but it is terrible, soooo...
    • -
    • Elsevier, are you kidding? Well, they do have some sort of API service, but its a pain in the ass.
    • -
    • Wiley, no better than Elsevier.
    • -
    +Check out the package on GitHub here [https://github.com/ropensci/rbison](https://github.com/ropensci/rbison). -

    Note that metadata can live in other places:

    +Here is a quick run through of some things you can do with `rbison`. -
      -
    • Another package being developed by David Springate, rpubmed can get PubMed metadata.
    • -
    • Our wrapper to the Mendeley API, RMendeley, gets article metadata via Mendeley's database.
    • -
    • Our wrapper to the Biodiversity Heritage Library API here gets their metadata.
    • -
    +*************** -

    No, you can't get metadata via Google Scholar - the don't allow scraping, and don't have expose their data via an API.

    +### Install ribson -

    I have discussed this package in a previous blog post, but have since worked on the code a bit, and thought it deserved a new post.

    +{% highlight r %} +# Install rbison from GitHub using devtools, uncomment to install +# install.packages('devtools') library(devtools) install_github('rbison', +# 'ropensci') +library(rbison) +{% endhighlight %} -

    You can see a tutorial for this package here, and contribute to the code here.

    -
    +*************** -

    Install rmetadata

    +### Search the BISON database for, of course, bison + -
    # install_github('rmetadata', 'ropensci') # uncomment to install
    -library(rmetadata)
    +{% highlight r %} +# Do the search +out <- bison(species = "Bison bison", type = "scientific_name", start = 0, count = 10) -
    +# Check that the returned object is the right class ('bison') +class(out) +{% endhighlight %} -

    Count OAI-PMH identifiers for a data provider.

    -
    # For DataCite.
    -count_identifiers("datacite")
     
    -  provider   count
    -1 datacite 1216193
    +{% highlight text %} +[1] "bison" +{% endhighlight %} -
    -

    Lookup article info via CrossRef with DOI and get a citation.

    +#### Get a summary of the data -

    As Bibtex

    -
    print(crossref_citation("10.3998/3336451.0009.101"), style = "Bibtex")
    +{% highlight r %}
    +bison_data(out)
    +{% endhighlight %}
     
    -@Article{,
    -  title = {In Google We Trust?},
    -  author = {Geoffrey Bilder},
    -  journal = {The Journal of Electronic Publishing},
    -  year = {2006},
    -  month = {01},
    -  volume = {9},
    -  doi = {10.3998/3336451.0009.101},
    -}
    -

    As regular text

    -
    print(crossref_citation("10.3998/3336451.0009.101"), style = "text")
    +{% highlight text %}
    +  total observation fossil specimen unknown
    +1   761          30      4      709      18
    +{% endhighlight %}
     
    -Bilder G (2006). "In Google We Trust?" _The Journal of Electronic
    -Publishing_, *9*. <URL:
    -http://dx.doi.org/10.3998/3336451.0009.101>.
    -
    +#### Summary by counties (just the first 6 rows) -

    Search the CrossRef Metatdata for DOIs using free form references.

    -

    Search with title, author, year, and journal

    +{% highlight r %} +head(bison_data(input = out, datatype = "counties")) +{% endhighlight %} -
    crossref_search_free(query = "Piwowar Sharing Detailed Research Data Is Associated with Increased Citation Rate PLOS one 2007")
     
    -                                                                                             text
    -1 Piwowar Sharing Detailed Research Data Is Associated with Increased Citation Rate PLOS one 2007
    -  match                   doi score
    -1  TRUE 10.1038/npre.2007.361 4.905
    -

    Get a DOI and get the citation using \code{crossref_search}

    +{% highlight text %} + record_id total county_name state +1 48295 7 Lipscomb Texas +2 41025 15 Harney Oregon +3 49017 8 Garfield Utah +4 35031 2 McKinley New Mexico +5 56013 1 Fremont Wyoming +6 40045 2 Ellis Oklahoma +{% endhighlight %} -
    # Get a DOI for a paper
    -doi <- crossref_search_free(query = "Piwowar sharing data PLOS one")$doi
     
    -# Get the metadata
    -crossref_search(doi = doi)[, 1:3]
    +#### Summary of states
     
    -                           doi score normalizedScore
    -1 10.1371/journal.pone.0000308 18.19             100
    - -
    - -

    Get a random set of DOI's through CrossRef.

    - -
    # Default search gets 20 random DOIs
    -crossref_r()
    -
    - [1] "10.4028/www.scientific.net/MSF.126-128.467"
    - [2] "10.2139/ssrn.548523"                       
    - [3] "10.1016/S0012-821X(02)00562-9"             
    - [4] "10.1093/rsq/13.2-3.167"                    
    - [5] "10.5772/55055"                             
    - [6] "10.1515/BC.1999.050"                       
    - [7] "10.1016/S0020-7292(98)90160-6"             
    - [8] "10.1111/j.1439-0418.1985.tb02788.x"        
    - [9] "10.1089/aid.2012.0115"                     
    -[10] "10.1016/0002-9378(95)90155-8"              
    -[11] "10.1001/jama.1949.02900490055028"          
    -[12] "10.1051/jphyscol:1989172"                  
    -[13] "10.1016/s0301-2115(03)00298-7"             
    -[14] "10.1007/BF02735292"                        
    -[15] "10.1016/0003-4916(65)90026-6"              
    -[16] "10.4156/jdcta.vol5.issue5.12"              
    -[17] "10.1007/s10904-009-9316-2"                 
    -[18] "10.1023/A:1021690001832"                   
    -[19] "10.1007/s12262-012-0724-0"                 
    -[20] "10.1007/bf02192860"
    - -
    # Specify you want journal articles only
    -crossref_r(type = "journal_article")
    -
    - [1] "10.1016/j.jacc.2011.09.055"                                 
    - [2] "10.1002/dev.420170603"                                      
    - [3] "10.4315/0362-028X.JFP-10-403"                               
    - [4] "10.1016/S0925-4927(98)00016-X"                              
    - [5] "10.1111/j.1933-1592.2002.tb00141.x"                         
    - [6] "10.1541/ieejfms.127.629"                                    
    - [7] "10.5539/enrr.v3n1p62"                                       
    - [8] "10.1016/S0960-9776(96)90038-7"                              
    - [9] "10.1016/0925-9635(94)05240-9"                               
    -[10] "10.1016/s0929-693x(97)86846-7"                              
    -[11] "10.1002/(SICI)1096-9071(199601)48:1<53::AID-JMV9>3.0.CO;2-K"
    -[12] "10.1016/s0267-7261(01)00016-1"                              
    -[13] "10.1111/j.1748-0361.2003.tb00575.x"                         
    -[14] "10.1097/00005721-197701000-00011"                           
    -[15] "10.1007/s00894-009-0593-z"                                  
    -[16] "10.1071/AR9830063"                                          
    -[17] "10.1186/gb-2009-10-4-r39"                                   
    -[18] "10.2165/00128415-201113540-00038"                           
    -[19] "10.1007/BF00522986"                                         
    -[20] "10.1080/19407963.2011.539385"
    - -
    - -

    Search the CrossRef Metatdata API.

    - -
    # Search for two different query terms
    -crossref_search(query = c("renear", "palmer"), rows = 4)[, 1:3]
     
    -                            doi score normalizedScore
    -1       10.1126/science.1157784 3.253             100
    -2  10.1002/meet.2009.1450460141 2.169              66
    -3 10.4242/BalisageVol3.Renear01 2.102              64
    -4 10.4242/BalisageVol5.Renear01 2.102              64
    +{% highlight r %} +bison_data(input = out, datatype = "states") +{% endhighlight %} -
    # Get results for a certain year
    -crossref_search(query = c("renear", "palmer"), year = 2010)[, 1:3]
     
    -                                  doi  score normalizedScore
    -1            10.1002/meet.14504701218 1.0509             100
    -2            10.1002/meet.14504701240 1.0509             100
    -3           10.5270/OceanObs09.cwp.68 1.0442              99
    -4               10.1353/mpq.2010.0003 0.6890              65
    -5                  10.1353/mpq.0.0041 0.6890              65
    -6                  10.1353/mpq.0.0044 0.6890              65
    -7                  10.1353/mpq.0.0057 0.6890              65
    -8                    10.1386/fm.1.1.2 0.6890              65
    -9                    10.1386/fm.1.2.2 0.6890              65
    -10                   10.1386/fm.1.3.2 0.6890              65
    -11       10.1097/ALN.0b013e3181f09404 0.6090              57
    -12      10.1016/j.urology.2010.02.033 0.6090              57
    -13              10.1353/ect.2010.0025 0.6090              57
    -14               10.1117/2.4201001.04 0.6090              57
    -15 10.1111/j.1835-9310.1977.tb01159.x 0.6090              57
    -16    10.4067/S0717-69962010000100001 0.6090              57
    -17    10.4067/S0717-69962010000200001 0.6090              57
    -18           10.2105/AJPH.2009.191098 0.6029              57
    -19              10.1353/mpq.2010.0004 0.5167              49
    -20                 10.1353/mpq.0.0048 0.5167              49
    -
    +{% highlight text %} + record_id total county_fips +1 Washington 1 53 +2 Texas 8 48 +3 New Mexico 8 35 +4 Iowa 1 19 +5 Montana 9 30 +6 Wyoming 155 56 +7 Oregon 15 41 +8 Oklahoma 14 40 +9 Kansas 10 20 +10 Arizona 1 04 +11 Alaska 29 02 +12 Utah 16 49 +13 Colorado 17 08 +14 Nebraska 1 31 +15 South Dakota 61 46 +{% endhighlight %} -

    Get a short DOI from shortdoi.org.

    -
    # Geta a short DOI, just the short DOI returned
    -short_doi(doi = "10.1371/journal.pone.0042793")
    +***************
     
    -[1] "10/f2bfz9"
    +### Map the results -
    # Geta a short DOI, all data returned
    -short_doi(doi = "10.1371/journal.pone.0042793", justshort = FALSE)
     
    -$DOI
    -[1] "10.1371/journal.pone.0042793"
    +{% highlight r %}
    +# Search for Ursus americanus (american black bear)
    +out <- bison(species = "Ursus americanus", type = "scientific_name", start = 0, 
    +    count = 200)
     
    -$ShortDOI
    -[1] "10/f2bfz9"
    +# Sweet, got some data
    +bison_data(out)
    +{% endhighlight %}
     
    -$IsNew
    -[1] FALSE
    -
    -

    Get a record from a OAI-PMH data provider.

    +{% highlight text %} + total observation fossil specimen literature unknown centroid +1 3792 59 125 3522 47 39 78 +{% endhighlight %} -
    # Single provider, one identifier
    -md_getrecord(provider = "pensoft", identifier = "10.3897/zookeys.1.10")
     
    -                                                                                                title
    -1 A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa
    -      creator date             type
    -1 Jocqué,Rudy 2008 Research Article
    +### Make some maps! Note that right now the county and state maps just plot the conterminous lower 48. The map of individual occurrences shows the lower 48 + Alaska -
    # Single provider, multiple identifiers
    -md_getrecord(provider = "pensoft", identifier = c("10.3897/zookeys.1.10", "10.3897/zookeys.4.57"))
     
    -                                                                                                   title
    -1    A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa
    -2 Studies of Tiger Beetles. CLXXVIII. A new Lophyra (Lophyra) from Somaliland (Coleoptera, Cicindelidae)
    -        creator date             type
    -1   Jocqué,Rudy 2008 Research Article
    -2 Cassola,Fabio 2008 Research Article
    +{% highlight r %} +# By county +bisonmap(out, tomap = "county") +{% endhighlight %} -
    +![center](/public/img/2013-05-25-rbison/map11.png) -

    List available metadata formats from various providers.

    +{% highlight r %} -
    # List metadata formats for a provider
    -md_listmetadataformats(provider = "dryad")
    +# By state
    +bisonmap(out, tomap = "state")
    +{% endhighlight %}
     
    -  metadataPrefix
    -1         oai_dc
    -2            rdf
    -3            ore
    -4           mets
    -                                                       schema
    -1              http://www.openarchives.org/OAI/2.0/oai_dc.xsd
    -2                 http://www.openarchives.org/OAI/2.0/rdf.xsd
    -3 http://tweety.lanl.gov/public/schemas/2008-06/atom-tron.sch
    -4                  http://www.loc.gov/standards/mets/mets.xsd
    -                            metadataNamespace
    -1 http://www.openarchives.org/OAI/2.0/oai_dc/
    -2    http://www.openarchives.org/OAI/2.0/rdf/
    -3                 http://www.w3.org/2005/Atom
    -4                    http://www.loc.gov/METS/
    +![center](/public/img/2013-05-25-rbison/map12.png) -
    # List metadata formats for a specific identifier for a provider
    -md_listmetadataformats(provider = "pensoft", identifier = "10.3897/zookeys.1.10")
    +{% highlight r %}
    +
    +# Individual locations
    +bisonmap(out)
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +## Rendering map...plotting 199 points
    +{% endhighlight %}
    +
    +![center](/public/img/2013-05-25-rbison/map13.png) 
    +
    +
    +*********
    +	
    +### When plotting occurrences, you can pass additional arguments into the `bisonmap` function.
    +
    +#### For example, you can jitter the points
    +
    +
    +{% highlight r %}
    +bisonmap(input = out, geom = geom_jitter)
    +{% endhighlight %}
     
    -            identifier metadataPrefix
    -1 10.3897/zookeys.1.10         oai_dc
    -2 10.3897/zookeys.1.10           mods
    -                                             schema
    -1    http://www.openarchives.org/OAI/2.0/oai_dc.xsd
    -2 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd
    -                            metadataNamespace
    -1 http://www.openarchives.org/OAI/2.0/oai_dc/
    -2                  http://www.loc.gov/mods/v3
    -
    -

    Some plotting - mean number of authors per paper

    +{% highlight text %} +## Rendering map...plotting 199 points +{% endhighlight %} -

    Okay, so this isn't a super useful visualization, but you can surely think of something better.

    +![center](/public/img/2013-05-25-rbison/map2.png) -
    library(ggplot2)
    -library(ggthemes)
    -library(reshape)
     
    +#### And you can specify by how much you want the points to jitter (here an extreme example to make it obvious)
     
    -temp <- md_listrecords(provider = "pensoft", from = "2011-10-01", until = "2012-01-01")
    -temp2 <- ldply(temp)[, -1]
    -auths <- sapply(temp2$creator, function(x) length(strsplit(as.character(x), 
    -    ";")[[1]]))
    -toplot <- data.frame(authors = auths, articletype = temp2$type)
    -toplot_ <- ddply(toplot, .(articletype), summarise, authors = mean(authors))
    -toplot_$articletype <- reorder(toplot_$articletype, toplot_$authors)
     
    -ggplot(toplot_, aes(articletype, authors)) + theme_tufte(base_size = 16) + geom_bar(stat = "identity") + 
    -    coord_flip()
    +{% highlight r %} +library(ggplot2) +bisonmap(input = out, geom = geom_jitter, jitter = position_jitter(width = 5)) +{% endhighlight %} -

    center

    -
    -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +{% highlight text %} +## Rendering map...plotting 199 points +{% endhighlight %} -

    Written in Markdown, with help from knitr, and knitcitations.

    +![center](/public/img/2013-05-25-rbison/map3.png) + + +********* + +#### Let us know if you have any feature requests or find bugs at [our GitHub Issues page](https://github.com/ropensci/rbison/issues).

    - - Visualizing rOpenSci collaboration + + Scholarly metadata in R

    - + + + Scholarly metadata - the meta-information surrounding articles - can be super useful. Although metadata does not contain the full content of articles, it contains a lot of useful information, including title, authors, abstract, URL to the article, etc. + +One of the largest sources of metadata is provided via the Open Archives Initiative Protocol for Metadata Harvesting or [OAI-PMH](http://www.openarchives.org/OAI/openarchivesprotocol.html). Many publishers, provide their metadata through their own endpoint, and implement the standard OAI-PMH methods: [GetRecord](http://www.openarchives.org/OAI/openarchivesprotocol.html#GetRecord), [Identify](http://www.openarchives.org/OAI/openarchivesprotocol.html#Identify), [ListIdentifiers](http://www.openarchives.org/OAI/openarchivesprotocol.html#ListIdentifiers), [ListMetadataFormats](http://www.openarchives.org/OAI/openarchivesprotocol.html#ListMetadataFormats), [ListRecords](http://www.openarchives.org/OAI/openarchivesprotocol.html#ListRecords), and [ListSets](http://www.openarchives.org/OAI/openarchivesprotocol.html#ListSets). Many providers use OAI-PMH, including [DataCite](http://oai.datacite.org/), [Dryad](http://wiki.datadryad.org/Data_Access#OAI-PMH), and [PubMed](http://www.ncbi.nlm.nih.gov/pmc/tools/oai/). + +Some data-/article-providers provide their metadata via their own APIs. For example, Nature Publishing Group provides their own metadata API [here](http://developers.nature.com/docs) in non OAI-PMH format; you can get PLoS metadata through their [search API](http://api.plos.org/), and the BHL (see below) provides their own custom metadata service. -

    We (rOpenSci) have been writing code for R packages for a couple years, so it is time to take a look back at the data. What data you ask? The commits data from GitHub ~ data that records who did what and when.

    +In addition, CrossRef provides a number of metadata search services: [metadata search](http://search.labs.crossref.org/help/api) and [openurl](http://labs.crossref.org/openurl/). -

    Using the Github commits API we can gather data on who commited code to a Github repository, and when they did it. Then we can visualize this hitorical record.

    +What about the other publishers? (please tell me if I'm wrong about these three) -
    ++ Springer has [a metadata API](http://dev.springer.com/docs), but it is terrible, soooo... ++ Elsevier, are you kidding? Well, they do have some sort of API service, but its a pain in the ass. ++ Wiley, no better than Elsevier. -

    Install some functions for interacting with the Github API via R

    +Note that metadata can live in other places: -
    install_github('sandbox', 'ropensci') 
    ++ Another package being developed by David Springate, [rpubmed](https://github.com/ropensci/rpubmed) can get PubMed metadata. 
    ++ Our wrapper to the Mendeley API, [RMendeley](https://github.com/ropensci/rmendeley), gets article metadata via Mendeley's database. 
    ++ Our wrapper to the Biodiversity Heritage Library API [here](http://www.biodiversitylibrary.org/api2/docs/docs.html) gets their metadata. 
     
    -library(sandbox)
    -library(httr)
    -library(ggplot2)
    -library(scales)
    -library(reshape2)
    -library(bipartite)
    -library(doMC)
    -library(plyr)
    -library(ggthemes)
    -library(picante)
    +No, you can't get metadata via Google Scholar - the don't allow scraping, and don't have expose their data via an API.
     
    -# And authenticate - pops open a page in your default browser, then tells 
    -# you authentication was successful
    -github_auth()
    +I have discussed this package [in a previous blog post](http://sckott.github.io/2012/09/rmetadata/), but have since worked on the code a bit, and thought it deserved a new post. -
    +You can see a tutorial for this package [here](http://ropensci.github.com/rmetadata/), and contribute to the code [here](https://github.com/ropensci/rmetadata). -

    Get all repos for an organization, here ropensci of course

    +*************** -
    ropensci_repos <- github_allrepos(userorg = "ropensci")
    +### Install rmetadata -
    +{% highlight r %} +# install_github('rmetadata', 'ropensci') # uncomment to install +library(rmetadata) +{% endhighlight %} + + +*************** + +### Count OAI-PMH identifiers for a data provider. + + +{% highlight r %} +# For DataCite. +count_identifiers("datacite") + + provider count +1 datacite 1216193 +{% endhighlight %} + + +********* + +### Lookup article info via CrossRef with DOI and get a citation. + +#### As Bibtex + + +{% highlight r %} +print(crossref_citation("10.3998/3336451.0009.101"), style = "Bibtex") + +@Article{, + title = {In Google We Trust?}, + author = {Geoffrey Bilder}, + journal = {The Journal of Electronic Publishing}, + year = {2006}, + month = {01}, + volume = {9}, + doi = {10.3998/3336451.0009.101}, +} +{% endhighlight %} + + +#### As regular text + + +{% highlight r %} +print(crossref_citation("10.3998/3336451.0009.101"), style = "text") + +Bilder G (2006). "In Google We Trust?" _The Journal of Electronic +Publishing_, *9*. . +{% endhighlight %} + + +********* + +### Search the CrossRef Metatdata for DOIs using free form references. + +#### Search with title, author, year, and journal + + +{% highlight r %} +crossref_search_free(query = "Piwowar Sharing Detailed Research Data Is Associated with Increased Citation Rate PLOS one 2007") + + text +1 Piwowar Sharing Detailed Research Data Is Associated with Increased Citation Rate PLOS one 2007 + match doi score +1 TRUE 10.1038/npre.2007.361 4.905 +{% endhighlight %} -

    Get commits broken down in to additions and deletions, though below we just collapse them to all commits

    + +#### Get a DOI and get the citation using \code{crossref_search} + -
    registerDoMC(cores = 4)
    -github_commits_safe <- plyr::failwith(NULL, github_commits)
    -out <- llply(ropensci_repos, function(x) github_commits_safe("ropensci", x, 
    -    since = "2009-01-01T", limit = 500), .parallel = TRUE)
    -names(out) <- ropensci_repos
    -out2 <- compact(out)
    -outdf <- ldply(out2)
    +{% highlight r %} +# Get a DOI for a paper +doi <- crossref_search_free(query = "Piwowar sharing data PLOS one")$doi + +# Get the metadata +crossref_search(doi = doi)[, 1:3] + + doi score normalizedScore +1 10.1371/journal.pone.0000308 18.19 100 +{% endhighlight %} + + +********* + +### Get a random set of DOI's through CrossRef. + + +{% highlight r %} +# Default search gets 20 random DOIs +crossref_r() + + [1] "10.4028/www.scientific.net/MSF.126-128.467" + [2] "10.2139/ssrn.548523" + [3] "10.1016/S0012-821X(02)00562-9" + [4] "10.1093/rsq/13.2-3.167" + [5] "10.5772/55055" + [6] "10.1515/BC.1999.050" + [7] "10.1016/S0020-7292(98)90160-6" + [8] "10.1111/j.1439-0418.1985.tb02788.x" + [9] "10.1089/aid.2012.0115" +[10] "10.1016/0002-9378(95)90155-8" +[11] "10.1001/jama.1949.02900490055028" +[12] "10.1051/jphyscol:1989172" +[13] "10.1016/s0301-2115(03)00298-7" +[14] "10.1007/BF02735292" +[15] "10.1016/0003-4916(65)90026-6" +[16] "10.4156/jdcta.vol5.issue5.12" +[17] "10.1007/s10904-009-9316-2" +[18] "10.1023/A:1021690001832" +[19] "10.1007/s12262-012-0724-0" +[20] "10.1007/bf02192860" +{% endhighlight %} + + + +{% highlight r %} + +# Specify you want journal articles only +crossref_r(type = "journal_article") + + [1] "10.1016/j.jacc.2011.09.055" + [2] "10.1002/dev.420170603" + [3] "10.4315/0362-028X.JFP-10-403" + [4] "10.1016/S0925-4927(98)00016-X" + [5] "10.1111/j.1933-1592.2002.tb00141.x" + [6] "10.1541/ieejfms.127.629" + [7] "10.5539/enrr.v3n1p62" + [8] "10.1016/S0960-9776(96)90038-7" + [9] "10.1016/0925-9635(94)05240-9" +[10] "10.1016/s0929-693x(97)86846-7" +[11] "10.1002/(SICI)1096-9071(199601)48:1<53::AID-JMV9>3.0.CO;2-K" +[12] "10.1016/s0267-7261(01)00016-1" +[13] "10.1111/j.1748-0361.2003.tb00575.x" +[14] "10.1097/00005721-197701000-00011" +[15] "10.1007/s00894-009-0593-z" +[16] "10.1071/AR9830063" +[17] "10.1186/gb-2009-10-4-r39" +[18] "10.2165/00128415-201113540-00038" +[19] "10.1007/BF00522986" +[20] "10.1080/19407963.2011.539385" +{% endhighlight %} + + +********* + +### Search the CrossRef Metatdata API. + + +{% highlight r %} +# Search for two different query terms +crossref_search(query = c("renear", "palmer"), rows = 4)[, 1:3] + + doi score normalizedScore +1 10.1126/science.1157784 3.253 100 +2 10.1002/meet.2009.1450460141 2.169 66 +3 10.4242/BalisageVol3.Renear01 2.102 64 +4 10.4242/BalisageVol5.Renear01 2.102 64 +{% endhighlight %} + + + +{% highlight r %} + +# Get results for a certain year +crossref_search(query = c("renear", "palmer"), year = 2010)[, 1:3] + + doi score normalizedScore +1 10.1002/meet.14504701218 1.0509 100 +2 10.1002/meet.14504701240 1.0509 100 +3 10.5270/OceanObs09.cwp.68 1.0442 99 +4 10.1353/mpq.2010.0003 0.6890 65 +5 10.1353/mpq.0.0041 0.6890 65 +6 10.1353/mpq.0.0044 0.6890 65 +7 10.1353/mpq.0.0057 0.6890 65 +8 10.1386/fm.1.1.2 0.6890 65 +9 10.1386/fm.1.2.2 0.6890 65 +10 10.1386/fm.1.3.2 0.6890 65 +11 10.1097/ALN.0b013e3181f09404 0.6090 57 +12 10.1016/j.urology.2010.02.033 0.6090 57 +13 10.1353/ect.2010.0025 0.6090 57 +14 10.1117/2.4201001.04 0.6090 57 +15 10.1111/j.1835-9310.1977.tb01159.x 0.6090 57 +16 10.4067/S0717-69962010000100001 0.6090 57 +17 10.4067/S0717-69962010000200001 0.6090 57 +18 10.2105/AJPH.2009.191098 0.6029 57 +19 10.1353/mpq.2010.0004 0.5167 49 +20 10.1353/mpq.0.0048 0.5167 49 +{% endhighlight %} + + +********* + +### Get a short DOI from shortdoi.org. + + +{% highlight r %} +# Geta a short DOI, just the short DOI returned +short_doi(doi = "10.1371/journal.pone.0042793") + +[1] "10/f2bfz9" +{% endhighlight %} + + + +{% highlight r %} + +# Geta a short DOI, all data returned +short_doi(doi = "10.1371/journal.pone.0042793", justshort = FALSE) + +$DOI +[1] "10.1371/journal.pone.0042793" + +$ShortDOI +[1] "10/f2bfz9" + +$IsNew +[1] FALSE +{% endhighlight %} + + +********* + +### Get a record from a OAI-PMH data provider. + + +{% highlight r %} +# Single provider, one identifier +md_getrecord(provider = "pensoft", identifier = "10.3897/zookeys.1.10") + + title +1 A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa + creator date type +1 Jocqué,Rudy 2008 Research Article +{% endhighlight %} + + + +{% highlight r %} + +# Single provider, multiple identifiers +md_getrecord(provider = "pensoft", identifier = c("10.3897/zookeys.1.10", "10.3897/zookeys.4.57")) + + title +1 A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa +2 Studies of Tiger Beetles. CLXXVIII. A new Lophyra (Lophyra) from Somaliland (Coleoptera, Cicindelidae) + creator date type +1 Jocqué,Rudy 2008 Research Article +2 Cassola,Fabio 2008 Research Article +{% endhighlight %} + + +********* + +### List available metadata formats from various providers. + + +{% highlight r %} +# List metadata formats for a provider +md_listmetadataformats(provider = "dryad") + + metadataPrefix +1 oai_dc +2 rdf +3 ore +4 mets + schema +1 http://www.openarchives.org/OAI/2.0/oai_dc.xsd +2 http://www.openarchives.org/OAI/2.0/rdf.xsd +3 http://tweety.lanl.gov/public/schemas/2008-06/atom-tron.sch +4 http://www.loc.gov/standards/mets/mets.xsd + metadataNamespace +1 http://www.openarchives.org/OAI/2.0/oai_dc/ +2 http://www.openarchives.org/OAI/2.0/rdf/ +3 http://www.w3.org/2005/Atom +4 http://www.loc.gov/METS/ +{% endhighlight %} + + + +{% highlight r %} + +# List metadata formats for a specific identifier for a provider +md_listmetadataformats(provider = "pensoft", identifier = "10.3897/zookeys.1.10") + + identifier metadataPrefix +1 10.3897/zookeys.1.10 oai_dc +2 10.3897/zookeys.1.10 mods + schema +1 http://www.openarchives.org/OAI/2.0/oai_dc.xsd +2 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd + metadataNamespace +1 http://www.openarchives.org/OAI/2.0/oai_dc/ +2 http://www.loc.gov/mods/v3 +{% endhighlight %} -
    -

    Plot commits by date and repo

    +********* -
    outdf_subset <- outdf[!outdf$.id %in% c("citeulike", "challenge", "docs", "ropensci-book", 
    -    "usecases", "textmine", "usgs", "ropenscitoolkit", "neotoma", "rEWDB", "rgauges", 
    -    "rodash", "ropensci.github.com", "ROAuth"), ]
    -outdf_subset$.id <- tolower(outdf_subset$.id)
    -outdf_subset <- ddply(outdf_subset, .(.id, date), summarise, value = sum(value))
    +### Some plotting - mean number of authors per paper
     
    -mindates <- llply(unique(outdf_subset$.id), function(x) min(outdf_subset[outdf_subset$.id == 
    -    x, "date"]))
    -names(mindates) <- unique(outdf_subset$.id)
    -mindates <- sort(do.call(c, mindates))
    -outdf_subset$.id <- factor(outdf_subset$.id, levels = names(mindates))
    +Okay, so this isn't a super useful visualization, but you can surely think of something better. 
     
    -ggplot(outdf_subset, aes(date, value, fill = .id)) + 
    -    geom_bar(stat = "identity", width = 0.5) + 
    -    geom_rangeframe(sides = "b", colour = "grey") + 
    -    theme_bw(base_size = 9) + 
    -    scale_x_date(labels = date_format("%Y"), breaks = date_breaks("year")) + 
    -    scale_y_log10() + 
    -    facet_grid(.id ~ .) + 
    -    labs(x = "", y = "") + 
    -    theme(axis.text.y = element_blank(), 
    -        axis.text.x = element_text(colour = "black"), 
    -        axis.ticks.y = element_blank(), 
    -        strip.text.y = element_text(angle = 0, size = 8, ), 
    -        strip.background = element_rect(size = 0), 
    -        panel.grid.major = element_blank(), 
    -        panel.grid.minor = element_blank(), 
    -        legend.text = element_text(size = 8), 
    -        legend.position = "none", 
    -        panel.border = element_blank())
    -

    center

    +{% highlight r %} +library(ggplot2) +library(ggthemes) +library(reshape) -

    The plot above plots the sum of additions+deletions, and is sorted by the first commit date of reach repo, with the first being treebase, which wraps the Treebase API, and the most recent being rwbclimate, which wraps the World Blank climate data API.

    -

    You can see that some repos have recieved commits more or less consistently over their life time, while others have seen a little development here and there.

    +temp <- md_listrecords(provider = "pensoft", from = "2011-10-01", until = "2012-01-01") +temp2 <- ldply(temp)[, -1] +auths <- sapply(temp2$creator, function(x) length(strsplit(as.character(x), + ";")[[1]])) +toplot <- data.frame(authors = auths, articletype = temp2$type) +toplot_ <- ddply(toplot, .(articletype), summarise, authors = mean(authors)) +toplot_$articletype <- reorder(toplot_$articletype, toplot_$authors) -
    +ggplot(toplot_, aes(articletype, authors)) + theme_tufte(base_size = 16) + geom_bar(stat = "identity") + + coord_flip() +{% endhighlight %} -

    w

    +![center](/public/img/someplotting.png) -

    In addition, there are quite a few people that have committed code now to rOpenSci repos, calling for a network vizualization of course.

    +*************** -
    outdf_network <- droplevels(outdf[!outdf$.id %in% c("citeulike", "challenge", 
    -    "docs", "ropensci-book", "usecases", "textmine", "usgs", "ropenscitoolkit", 
    -    "retriever", "rodash", "ropensci.github.com", "ROAuth", "rgauges", "sandbox", 
    -    "rfna", "rmetadata", "rhindawi", "rpmc", "rpensoft", "ritis"), ])
    -casted <- dcast(outdf_network, .id + date + name ~ variable, fun.aggregate = length, 
    -    value.var = "value")
    -names(casted)[1] <- "repo"
    -casted2 <- ddply(casted, .(repo, name), summarise, commits = sum(additions))
    -casted2 <- data.frame(repo = casted2$repo, weight = casted2$commits, name = casted2$name)
    -mat <- sample2matrix(casted2)
    -plotweb(sortweb(mat, sort.order = "dec"), method = "normal", text.rot = 90, 
    -    adj.high = c(-0.3, 0), adj.low = c(1, -0.3), y.width.low = 0.05, y.width.high = 0.05, 
    -    ybig = 0.09, labsize = 0.7)
    +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2013-03-16-r-metadata.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2013-03-16-r-metadata.md). -

    center

    - -

    The plot above shows repos on one side and contributors on the other. Some folks (the core rOpenSci team: cboettig, karthikram, emhart, and schamberlain) have committed quite a lot to many packages. We also have amny awesome contributors to our packages (some contributors and repos have been removed for clarity).

    - -

    rOpenSci is truly a collaborative effort to develop tools for open science, so thanks to all our contributors - keep on forking, pull requesting, and commiting.

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and [knitcitations](https://github.com/cboettig/knitcitations).
    diff --git a/_site/page23/index.html b/_site/page23/index.html index 407a45e6c6..44c56292c2 100644 --- a/_site/page23/index.html +++ b/_site/page23/index.html @@ -59,6 +59,142 @@

    Recology

      +
    +

    + + Visualizing rOpenSci collaboration + +

    + + + + We ([rOpenSci](http://ropensci.org/)) have been writing code for R packages for a couple years, so it is time to take a look back at the data. What data you ask? The commits data from GitHub ~ data that records who did what and when. + +Using the [Github commits API](http://developer.github.com/v3/repos/commits/) we can gather data on who commited code to a Github repository, and when they did it. Then we can visualize this hitorical record. + +*************** + +### Install some functions for interacting with the Github API via R + +{% highlight r %} + +install_github('sandbox', 'ropensci') + +library(sandbox) +library(httr) +library(ggplot2) +library(scales) +library(reshape2) +library(bipartite) +library(doMC) +library(plyr) +library(ggthemes) +library(picante) + +# And authenticate - pops open a page in your default browser, then tells +# you authentication was successful +github_auth() +{% endhighlight %} + + +*************** + +### Get all repos for an organization, here ropensci of course + +{% highlight r %} +ropensci_repos <- github_allrepos(userorg = "ropensci") +{% endhighlight %} + + +*************** + +### Get commits broken down in to additions and deletions, though below we just collapse them to all commits + +{% highlight r %} +registerDoMC(cores = 4) +github_commits_safe <- plyr::failwith(NULL, github_commits) +out <- llply(ropensci_repos, function(x) github_commits_safe("ropensci", x, + since = "2009-01-01T", limit = 500), .parallel = TRUE) +names(out) <- ropensci_repos +out2 <- compact(out) +outdf <- ldply(out2) +{% endhighlight %} + + +*************** + +### Plot commits by date and repo + +{% highlight r %} +outdf_subset <- outdf[!outdf$.id %in% c("citeulike", "challenge", "docs", "ropensci-book", + "usecases", "textmine", "usgs", "ropenscitoolkit", "neotoma", "rEWDB", "rgauges", + "rodash", "ropensci.github.com", "ROAuth"), ] +outdf_subset$.id <- tolower(outdf_subset$.id) +outdf_subset <- ddply(outdf_subset, .(.id, date), summarise, value = sum(value)) + +mindates <- llply(unique(outdf_subset$.id), function(x) min(outdf_subset[outdf_subset$.id == + x, "date"])) +names(mindates) <- unique(outdf_subset$.id) +mindates <- sort(do.call(c, mindates)) +outdf_subset$.id <- factor(outdf_subset$.id, levels = names(mindates)) + +ggplot(outdf_subset, aes(date, value, fill = .id)) + + geom_bar(stat = "identity", width = 0.5) + + geom_rangeframe(sides = "b", colour = "grey") + + theme_bw(base_size = 9) + + scale_x_date(labels = date_format("%Y"), breaks = date_breaks("year")) + + scale_y_log10() + + facet_grid(.id ~ .) + + labs(x = "", y = "") + + theme(axis.text.y = element_blank(), + axis.text.x = element_text(colour = "black"), + axis.ticks.y = element_blank(), + strip.text.y = element_text(angle = 0, size = 8, ), + strip.background = element_rect(size = 0), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + legend.text = element_text(size = 8), + legend.position = "none", + panel.border = element_blank()) +{% endhighlight %} + + +![center](/public/img/commitsbydate.png) + + +The plot above plots the sum of additions+deletions, and is sorted by the first commit date of reach repo, with the first being [treebase](https://github.com/ropensci/treeBASE), which wraps the [Treebase API](http://treebase.org/treebase-web/urlAPI.html), and the most recent being [rwbclimate](https://github.com/ropensci/rWBclimate), which wraps the [World Blank climate data API](http://data.worldbank.org/developers/climate-data-api). + +You can see that some repos have recieved commits more or less consistently over their life time, while others have seen a little development here and there. + +*************** +w +### In addition, there are quite a few people that have committed code now to rOpenSci repos, calling for a network vizualization of course. + +{% highlight r %} +outdf_network <- droplevels(outdf[!outdf$.id %in% c("citeulike", "challenge", + "docs", "ropensci-book", "usecases", "textmine", "usgs", "ropenscitoolkit", + "retriever", "rodash", "ropensci.github.com", "ROAuth", "rgauges", "sandbox", + "rfna", "rmetadata", "rhindawi", "rpmc", "rpensoft", "ritis"), ]) +casted <- dcast(outdf_network, .id + date + name ~ variable, fun.aggregate = length, + value.var = "value") +names(casted)[1] <- "repo" +casted2 <- ddply(casted, .(repo, name), summarise, commits = sum(additions)) +casted2 <- data.frame(repo = casted2$repo, weight = casted2$commits, name = casted2$name) +mat <- sample2matrix(casted2) +plotweb(sortweb(mat, sort.order = "dec"), method = "normal", text.rot = 90, + adj.high = c(-0.3, 0), adj.low = c(1, -0.3), y.width.low = 0.05, y.width.high = 0.05, + ybig = 0.09, labsize = 0.7) +{% endhighlight %} + +![center](/public/img/collabnetwork.png) + + +The plot above shows repos on one side and contributors on the other. Some folks (the core rOpenSci team: cboettig, karthikram, emhart, and schamberlain) have committed quite a lot to many packages. We also have amny awesome contributors to our packages (some contributors and repos have been removed for clarity). + +rOpenSci is truly a collaborative effort to develop tools for open science, so thanks to all our contributors - keep on forking, pull requesting, and commiting. + +
    +

    @@ -68,48 +204,48 @@

    -

    Reboot

    + ## Reboot -

    We need to reboot academia, at least for graduate training. I am speaking from the point of view of ecology/evolution (EEB). Why you ask? Because of the following line of reasoning:

    +We need to reboot academia, at least for graduate training. I am speaking from the point of view of ecology/evolution (EEB). Why you ask? Because of the following line of reasoning: -
      -
    • First, the most important factor for me comes down to supply and demand. We have too much supply (=graduate students) and not enough demand (=faculty positions, etc.) - see this comic at PhDComics for proof. This seems especially apparent when you hear from your fellow postdoc friends that there were hundreds of other people with Ph.D.'s applying for the same position.

    • -
    • Second, funding is getting thin. I have never received funding from a competitive grant, despite having 12 published papers to my name. Recent cuts to the NSF, NIH, and other federal agencies mean that getting a grant will be harder and harder. Furthermore, the mean age of a first time NIH grant recipient in 2008 was 51 according to a recent study in PLoS One (Matthews et. al. 2011).

    • -
    • Third, we don't learn the skills we really need. This is many fold. First, we don't learn the appropriate mathematical and statistical techniques in undergraduate and grad school - a forthcoming paper found that in a survey of nearly 1000 ecology and evolution graduate students, most thought they were unprepared wrt to math and stats (interview with author in Soundcloud widget below). Second, we don't learn enough computational skills. Digital data (not on your physical clipboard, but your digital one) is more and more important, requiring knowing how to leverage and keep track of data. Yet, we aren't taught these skills, at least in my experience. The need for training in computation/coding is evident from the sold out Software Carpentry workshops. Third, reproducibility is not something we are taught. Well, we are taught to check over everything in detail (read: proof your data), but there is often no way to reproduce analyses when we use 10 different expensive software programs to do an analysis (read: MS Word, JMP, SAS, SigmaPlot, etc.). And isn't reproduciblity important?

    • -
    ++ First, the most important factor for me comes down to supply and demand. We have too much supply (=graduate students) and not enough demand (=faculty positions, etc.) - see [this comic at PhDComics](http://www.phdcomics.com/comics/archive.php?comicid=911) for proof. This seems especially apparent when you hear from your fellow postdoc friends that there were hundreds of other people with Ph.D.'s applying for the same position. -
    ++ Second, funding is getting thin. I have never received funding from a competitive grant, despite having 12 published papers to my name. Recent cuts to the NSF, NIH, and other federal agencies mean that getting a grant will be harder and harder. Furthermore, the mean age of a first time NIH grant recipient in 2008 was 51 according to a recent study in PLoS One (Matthews _et. al._ 2011). - ++ Third, we don't learn the skills we really need. This is many fold. First, we don't learn the appropriate mathematical and statistical techniques in undergraduate and grad school - a forthcoming paper found that in a survey of nearly 1000 ecology and evolution graduate students, most thought they were unprepared wrt to math and stats (interview with author in Soundcloud widget below). Second, we don't learn enough computational skills. Digital data (not on your physical clipboard, but your digital one) is more and more important, requiring knowing how to leverage and keep track of data. Yet, we aren't taught these skills, at least in my experience. The need for training in computation/coding is evident from the sold out [Software Carpentry workshops](http://software-carpentry.org/). Third, reproducibility is not something we are taught. Well, we are taught to check over everything in detail (read: proof your data), but there is often no way to reproduce analyses when we use 10 different expensive software programs to do an analysis (read: MS Word, JMP, SAS, SigmaPlot, etc.). And isn't reproduciblity important? + +*************** -
    + -

    What do we do?

    +*************** -

    To address the supply/demand issue, I think we need fewer graduate students, period. I think this will work for a few reasons. If there are fewer graduate students, those that get in will be of higher quality because profs can be more selective, they may get payed more (hopefully) since there are few students, and they should in theory get more attention from their advisers (if they want it). In addition, there would be less competition for the very few grants out there for grad students. This would then lead to fewer postdocs, and less competition for faculty positions. I think the supply/demand issue in EEB is particularly problematic. That is, in EEB there doesn't seem to be the large quantity of private sector jobs as there is for Ph.D. graduates in engineering, physics, etc.

    +## What do we do? -

    The funding situation is beyond me, but definitely makes me want to leave academia. Crowdfunding, especially #SciFund, is an option for scientists, but mostly only on a small financial scale. Any thoughts?

    +To address the supply/demand issue, I think we need fewer graduate students, period. I think this will work for a few reasons. If there are fewer graduate students, those that get in will be of higher quality because profs can be more selective, they may get payed more (hopefully) since there are few students, and they should in theory get more attention from their advisers (if they want it). In addition, there would be less competition for the very few grants out there for grad students. This would then lead to fewer postdocs, and less competition for faculty positions. I think the supply/demand issue in EEB is particularly problematic. That is, in EEB there doesn't seem to be the large quantity of private sector jobs as there is for Ph.D. graduates in engineering, physics, etc. -

    The skills issue will likely be addressed in time, and vary among schools for sure. Some schools will focus on natural history, which is good (that's where I did my undergrad and it was great), and some schools will incorporate more of these science 2.0 skills (advanced stats, better math training, and computer science).

    +The funding situation is beyond me, but definitely makes me want to leave academia. Crowdfunding, especially [#SciFund](http://scifundchallenge.org/), is an option for scientists, but mostly only on a small financial scale. Any thoughts? -
    +The skills issue will likely be addressed in time, and vary among schools for sure. Some schools will focus on natural history, which is good (that's where I did my undergrad and it was great), and some schools will incorporate more of these science 2.0 skills (advanced stats, better math training, and computer science). -

    Thoughts?

    +*************** -
    +## _Thoughts?_ -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +*************** -

    Written in Markdown, with help from knitr, and knitcitations.

    +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2013-02-22-academia-reboot.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2013-02-22-academia-reboot.md). -
    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and [knitcitations](https://github.com/cboettig/knitcitations). -

    References

    +*************** -

    Matthews KRW, Calhoun KM, lo N, ho V and Germano G (2011). +#### References +

    Matthews KRW, Calhoun KM, lo N, ho V and Germano G (2011). “The Aging of Biomedical Research in The United States.” Plos One, 6. -http://dx.doi.org/10.1371/journal.pone.0029738.

    +http://dx.doi.org/10.1371/journal.pone.0029738. +
    @@ -122,83 +258,68 @@

    -

    I was just at the Phylotastic hackathon in Tucson, AZ at the iPlant facilities at the UofA.

    - -

    A problem that needs to be solved is getting the incrasingly vast phylogenetic information to folks not comfortable building their own phylogenies. Phylomatic has made this super easy for people that want plant phylogenies (at least 250 or so papers have used and cited Phylomatic in their papers) - however, there are few options for those that want phylogenies for other taxonomic groups.

    - -

    One cool tool that was brought up was the Common Tree service provided by NCBI. Here's some help on the service. Unlike Phylomatic, Common Tree is purely based off of taxonomic relationships (A and B are both in the C family, so are sisters), not an actual phylogeny as Phylomatic is based on.

    - -

    But how do you use Common Tree?

    - -
    - -

    Get a species list

    - -

    Grab the taxon list from my github account here

    + I was just at the [Phylotastic hackathon](http://www.evoio.org/wiki/Phylotastic) in Tucson, AZ at the [iPlant](http://www.iplantcollaborative.org/) facilities at the UofA. -

    Go to the site

    +A problem that needs to be solved is getting the incrasingly vast phylogenetic information to folks not comfortable building their own phylogenies. [Phylomatic](http://phylodiversity.net/phylomatic/) has made this super easy for people that want plant phylogenies (at least 250 or so papers have used and cited Phylomatic in their papers) - however, there are few options for those that want phylogenies for other taxonomic groups. -

    Go to the Common Tree site here

    +One cool tool that was brought up was the [Common Tree](http://www.ncbi.nlm.nih.gov/Taxonomy/CommonTree/wwwcmt.cgi) service provided by NCBI. [Here's](http://www.ncbi.nlm.nih.gov/Taxonomy/CommonTree/cmthelp.html) some help on the service. Unlike Phylomatic, Common Tree is purely based off of taxonomic relationships (A and B are both in the C family, so are sisters), not an actual phylogeny as Phylomatic is based on. -

    Choose file

    +But how do you use Common Tree? -

    Hit the "choose file" button, then select the species.txt file you downloaded in the first step.

    +*************** -

    Add the species list to make the tree

    +### Get a species list +Grab the taxon list from my github account [here](https://raw.github.com/sckott/sckott.github.com/master/public/img/species.txt) -

    Then hit "add from file", and you got a "tree"

    +### Go to the site +Go to the Common Tree site [here](http://www.ncbi.nlm.nih.gov/Taxonomy/CommonTree/wwwcmt.cgi) -

    image

    +### Choose file +Hit the "choose file" button, then select the `species.txt` file you downloaded in the first step. -

    Download

    +### Add the species list to make the tree +Then hit "add from file", and you got a "tree" -

    You can download the tree in a variety of formats, including a .phy file

    +![image](https://raw.github.com/sckott/sckott.github.com/master/public/img/ncbi.png) -

    image

    +## Download +You can download the tree in a variety of formats, including a .phy file -

    Plot the tree on your machine

    +![image](https://raw.github.com/sckott/sckott.github.com/master/public/img/ncbi2.png) -

    Make a tree, in R for me

    +## Plot the tree on your machine +Make a tree, in R for me -
    # install.packages('ape') # install if you don't have ape
    -library(ape)
     
    -# Read the tree in. YOu get the tree back with alot of newlines (\n) -
    -# can easily take these out with a good text editor.
    -tree <- read.tree(text = "(Lampetra:4,((((((Umbra:4,((Lota:4,Microgadus:4)Gadiformes:4,((Culaea:4,Apeltes:4,Pungitius:4,Gasterosteus:4)Gasterosteidae:4,(Morone:4,(Ambloplites:4,Micropterus:4,Lepomis:4)Centrarchidae:4,(Sander:4,Perca:4)Percidae:4)Percoidei:4,Cottus:4)Percomorpha:4)Holacanthopterygii:4)Neognathi:4,(((Prosopium:4,Coregonus:4)Coregoninae:4,(Salvelinus:4,Salmo:4,Oncorhynchus:4)Salmoninae:4)Salmonidae:4,Osmerus:4)Protacanthopterygii:4)Euteleostei:4,(Alosa:4,(Ameiurus:4,(Catostomus:4,(Semotilus:4,Rhinichthys:4,Margariscus:4,Couesius:4,Pimephales:4,Luxilus:4,Notemigonus:4,Notropis:4,Carassius:4)Cyprinidae:4)Cypriniformes:4)Otophysi:4)Otocephala:4)Clupeocephala:4,Anguilla:4)Elopocephala:4,Acipenser:4)Actinopteri:4,Scyliorhinus:4)Gnathostomata:4)Vertebrata:4;")
    +{% highlight r %}
    +# install.packages('ape') # install if you don't have ape
    +library(ape)
     
    -# stretch the branches so tips line up
    -tree2 <- compute.brlen(tree, method = "Grafen")
    +# Read the tree in. YOu get the tree back with alot of newlines (\n) -
    +# can easily take these out with a good text editor.
    +tree <- read.tree(text = "(Lampetra:4,((((((Umbra:4,((Lota:4,Microgadus:4)Gadiformes:4,((Culaea:4,Apeltes:4,Pungitius:4,Gasterosteus:4)Gasterosteidae:4,(Morone:4,(Ambloplites:4,Micropterus:4,Lepomis:4)Centrarchidae:4,(Sander:4,Perca:4)Percidae:4)Percoidei:4,Cottus:4)Percomorpha:4)Holacanthopterygii:4)Neognathi:4,(((Prosopium:4,Coregonus:4)Coregoninae:4,(Salvelinus:4,Salmo:4,Oncorhynchus:4)Salmoninae:4)Salmonidae:4,Osmerus:4)Protacanthopterygii:4)Euteleostei:4,(Alosa:4,(Ameiurus:4,(Catostomus:4,(Semotilus:4,Rhinichthys:4,Margariscus:4,Couesius:4,Pimephales:4,Luxilus:4,Notemigonus:4,Notropis:4,Carassius:4)Cyprinidae:4)Cypriniformes:4)Otophysi:4)Otocephala:4)Clupeocephala:4,Anguilla:4)Elopocephala:4,Acipenser:4)Actinopteri:4,Scyliorhinus:4)Gnathostomata:4)Vertebrata:4;")
     
    -# Plot the tree
    -plot(tree2, no.margin = TRUE, cex = 0.7)
    +# stretch the branches so tips line up +tree2 <- compute.brlen(tree, method = "Grafen") -

    w00p, there it is...

    +# Plot the tree +plot(tree2, no.margin = TRUE, cex = 0.7) +{% endhighlight %} -

    image4

    -
    +### w00p, there it is... -

    And the answer is NO to the question: Is there an API for Common Tree?

    +![image4](https://raw.github.com/sckott/sckott.github.com/master/public/img/ncbi_tree.png) -
    +*************** -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +### And the answer is _NO_ to the question: Is there an API for Common Tree? -

    Written in Markdown, with help from knitr.

    - -
    - -
    -

    - - testing ifttt recipe, ignore - -

    +*************** - +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2013-02-14-common-tree.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2013-02-14-common-tree.md). -

    testing ifttt recipe

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    diff --git a/_site/page24/index.html b/_site/page24/index.html index f1d40aa96d..07d8e85211 100644 --- a/_site/page24/index.html +++ b/_site/page24/index.html @@ -59,6 +59,19 @@

    Recology

      +
    +

    + + testing ifttt recipe, ignore + +

    + + + + testing ifttt recipe + +
    +

    @@ -68,37 +81,41 @@

    -

    Dealing with API tokens in R

    + ### Dealing with API tokens in R + +In [my previous post](http://sckott.github.io/2013/01/tnrs-use-case/) I showed an example of calling the Phylotastic taxonomic name resolution API `Taxosaurus` [here](http://api.phylotastic.org/tnrs). When you query their API they give you a token which you use later to retrieve the result (see examples on their page above). However, you don't know when the query will be done, so how do we know when to send the query to rerieve the data? + +*************** -

    In my previous post I showed an example of calling the Phylotastic taxonomic name resolution API Taxosaurus here. When you query their API they give you a token which you use later to retrieve the result (see examples on their page above). However, you don't know when the query will be done, so how do we know when to send the query to rerieve the data?

    +As the time this takes depends on how big the query is and other things, we don't know when we can get the result. I struggled with this for a bit, but then settled on using a while loop. -
    +*************** -

    As the time this takes depends on how big the query is and other things, we don't know when we can get the result. I struggled with this for a bit, but then settled on using a while loop.

    +So what does this look like? Basically we just keep sending the request for data until we get it. -
    -

    So what does this look like? Basically we just keep sending the request for data until we get it.

    +{% highlight r %} +iter <- 0 # make an iterator so each time we call +output <- list() # make an empty list to put data into +timeout <- "wait" +while (timeout == "wait") { + iter <- iter + 1 # increase the iterator each time + temp <- fromJSON(getURL(retrieve)) # send the request and parse the JSON + if (grepl("is still being processed", temp["message"]) == TRUE) { + timeout <- "wait" + } else { + output[[iter]] <- temp # put result from query in the list + timeout <- "done" # we got the result so timeout is now done, making the while loop stop + } +} +{% endhighlight %} -
    iter <- 0  # make an iterator so each time we call
    -output <- list()  # make an empty list to put data into
    -timeout <- "wait"
    -while (timeout == "wait") {
    -    iter <- iter + 1  # increase the iterator each time
    -    temp <- fromJSON(getURL(retrieve))  # send the request and parse the JSON
    -    if (grepl("is still being processed", temp["message"]) == TRUE) {
    -        timeout <- "wait"
    -    } else {
    -        output[[iter]] <- temp  # put result from query in the list
    -        timeout <- "done"  # we got the result so timeout is now done, making the while loop stop
    -    }
    -}
    -
    +*************** -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2013-01-26-api-token.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2013-01-26-api-token.md). -

    Written in Markdown, with help from knitr.

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    @@ -111,61 +128,84 @@

    -

    taxize use case: Resolving species names when you have a lot of them

    + ### __taxize use case: Resolving species names when you have a lot of them__ + +Species names can be a pain in the ass, especially if you are an ecologist. We ecologists aren't trained in taxonomy, yet we often end up with huge species lists. Of course we want to correct any spelling errors in the names, and get the newest names for our species, resolve any synonyms, etc. + +We are building tools into our R package [`taxize`](http://ropensci.github.com/taxize_/), that will let you check your species names to make sure they are correct. + +An important use case is when you have a lot of species. Someone wrote to us recently, saying that they had thousands of species, and they wanted to know how to check their species names efficiently in R. + +Below is an example of how to do this. + +*************** + +#### Install taxize -

    Species names can be a pain in the ass, especially if you are an ecologist. We ecologists aren't trained in taxonomy, yet we often end up with huge species lists. Of course we want to correct any spelling errors in the names, and get the newest names for our species, resolve any synonyms, etc.

    +{% highlight r %} +# install_github('taxize_', 'ropensci') # install the GitHub version, not +# the CRAN version, uncomment if you don't have it installed +library(taxize) +{% endhighlight %} -

    We are building tools into our R package taxize, that will let you check your species names to make sure they are correct.

    -

    An important use case is when you have a lot of species. Someone wrote to us recently, saying that they had thousands of species, and they wanted to know how to check their species names efficiently in R.

    +*************** -

    Below is an example of how to do this.

    +#### Get some species, in this case all species in the Scrophulariaceae family from theplantlist.org -
    +{% highlight r %} +tpl_get(dir_ = "~/foo2", family = "Scrophulariaceae") +{% endhighlight %} -

    Install taxize

    -
    # install_github('taxize_', 'ropensci') # install the GitHub version, not
    -# the CRAN version, uncomment if you don't have it installed
    -library(taxize)
    -
    +{% highlight text %} +## Reading and writing csv files to ~/foo2... +{% endhighlight %} -

    Get some species, in this case all species in the Scrophulariaceae family from theplantlist.org

    -
    tpl_get(dir_ = "~/foo2", family = "Scrophulariaceae")
    +{% highlight r %} +dat <- read.csv("~/foo2/Scrophulariaceae.csv") +{% endhighlight %} -
    ## Reading and writing csv files to ~/foo2...
    -
    dat <- read.csv("~/foo2/Scrophulariaceae.csv")
    +*************** -
    +#### Lets grab the species and concatenate to genus_species -

    Lets grab the species and concatenate to genus_species

    +{% highlight r %} +species <- as.character(ddply(dat[, c("Genus", "Species")], .(), transform, + gen_sp = as.factor(paste(Genus, Species, sep = " ")))[, 4]) +{% endhighlight %} -
    species <- as.character(ddply(dat[, c("Genus", "Species")], .(), transform, 
    -    gen_sp = as.factor(paste(Genus, Species, sep = " ")))[, 4])
    -
    +*************** -

    It's better to do many smaller calls to a web API instead of few big ones to be nice to the database maintainers.

    +#### It's better to do many smaller calls to a web API instead of few big ones to be nice to the database maintainers. -
    ## Define function to split up your species list into useable chuncks
    -slice <- function(input, by = 2) {
    -    starts <- seq(1, length(input), by)
    -    tt <- lapply(starts, function(y) input[y:(y + (by - 1))])
    -    llply(tt, function(x) x[!is.na(x)])
    -}
    -species_split <- slice(species, by = 100)
    +{% highlight r %} +## Define function to split up your species list into useable chuncks +slice <- function(input, by = 2) { + starts <- seq(1, length(input), by) + tt <- lapply(starts, function(y) input[y:(y + (by - 1))]) + llply(tt, function(x) x[!is.na(x)]) +} +species_split <- slice(species, by = 100) +{% endhighlight %} -
    -

    Query for your large species list with pauses between calls, with 3 seconds in between calls to not hit the web service too hard. Using POST method here instead of GET - required when you have a lot of species.

    +*************** -
    tnrs_safe <- failwith(NULL, tnrs)  # in case some calls fail, will continue
    -out <- llply(species_split, function(x) tnrs_safe(x, getpost = "POST", sleep = 3))
    +#### Query for your large species list with pauses between calls, with 3 seconds in between calls to not hit the web service too hard. Using POST method here instead of GET - required when you have a lot of species. -
    Calling http://taxosaurus.org/retrieve/90fcd9ae425ad7c6103b06dd9fd78ae2
    +{% highlight r %}
    +tnrs_safe <- failwith(NULL, tnrs)  # in case some calls fail, will continue
    +out <- llply(species_split, function(x) tnrs_safe(x, getpost = "POST", sleep = 3))
    +{% endhighlight %}
    +
    +
    +{% highlight text %}
    +Calling http://taxosaurus.org/retrieve/90fcd9ae425ad7c6103b06dd9fd78ae2
     Calling http://taxosaurus.org/retrieve/223f73b83fcddcb8b6187966963660a8
     Calling http://taxosaurus.org/retrieve/72bacdbb8938316e321d4c709c8cdd09
     Calling http://taxosaurus.org/retrieve/979ce9cc4dec376710f61de162e1294e
    @@ -175,12 +215,21 @@ 

    Query for your large species list with pauses between calls, with 3 seconds Calling http://taxosaurus.org/retrieve/215ccdcf2b00362278bf19d1942e1395 Calling http://taxosaurus.org/retrieve/9d43c0b99b4dfb5ea1b435adab17b980 Calling http://taxosaurus.org/retrieve/42e166f8e43f1fb349e36459cd5938b3 -Calling http://taxosaurus.org/retrieve/2c42e4b5227c5464f9bfeeafcdf0651d

    +Calling http://taxosaurus.org/retrieve/2c42e4b5227c5464f9bfeeafcdf0651d +{% endhighlight %} + + + +{% highlight r %} -
    # Looks like we got some data back for each element of our species list
    -lapply(out, head)[1:2]  # just look at the first two
    +# Looks like we got some data back for each element of our species list +lapply(out, head)[1:2] # just look at the first two +{% endhighlight %} -
    [[1]]
    +
    +
    +{% highlight text %}
    +[[1]]
                      submittedName                 acceptedName    sourceId
     1        Aptosimum welwitschii                              iPlant_TNRS
     2        Anticharis ebracteata        Anticharis ebracteata iPlant_TNRS
    @@ -191,7 +240,7 @@ 

    Query for your large species list with pauses between calls, with 3 seconds score matchedName annotations 1 1 Aptosimum welwitschii 2 1 Anticharis ebracteata Schinz -3 1 Aptosimum lineare Marloth & Engl. +3 1 Aptosimum lineare Marloth & Engl. 4 1 Antherothamnus pearsonii N.E. Br. 5 1 Barthlottia madagascariensis Eb. Fisch. 6 1 Agathelpis mucronata @@ -214,8 +263,8 @@

    Query for your large species list with pauses between calls, with 3 seconds score matchedName annotations 1 0.9 Buddleja pichinchensis Kunth 2 1.0 Buddleja soratae Kraenzl. -3 1.0 Buddleja euryphylla Standl. & Steyerm. -4 1.0 Buddleja incana Ruiz & Pav. +3 1.0 Buddleja euryphylla Standl. & Steyerm. +4 1.0 Buddleja incana Ruiz & Pav. 5 1.0 Buddleja incana none 6 1.0 Buddleja nana Diels uri @@ -224,13 +273,22 @@

    Query for your large species list with pauses between calls, with 3 seconds 3 http://www.tropicos.org/Name/19000790 4 http://www.tropicos.org/Name/19000596 5 http://www.ncbi.nlm.nih.gov/taxonomy/405077 -6 http://www.tropicos.org/Name/19001133

    +6 http://www.tropicos.org/Name/19001133 +{% endhighlight %} + + -
    # Now we can put them back together as so into one data.frame if you like
    -outdf <- ldply(out)
    -head(outdf)
    +{% highlight r %} -
                     submittedName                 acceptedName    sourceId
    +# Now we can put them back together as so into one data.frame if you like
    +outdf <- ldply(out)
    +head(outdf)
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +                 submittedName                 acceptedName    sourceId
     1        Aptosimum welwitschii                              iPlant_TNRS
     2        Anticharis ebracteata        Anticharis ebracteata iPlant_TNRS
     3            Aptosimum lineare            Aptosimum lineare iPlant_TNRS
    @@ -240,7 +298,7 @@ 

    Query for your large species list with pauses between calls, with 3 seconds score matchedName annotations 1 1 Aptosimum welwitschii 2 1 Anticharis ebracteata Schinz -3 1 Aptosimum lineare Marloth & Engl. +3 1 Aptosimum lineare Marloth & Engl. 4 1 Antherothamnus pearsonii N.E. Br. 5 1 Barthlottia madagascariensis Eb. Fisch. 6 1 Agathelpis mucronata @@ -250,28 +308,42 @@

    Query for your large species list with pauses between calls, with 3 seconds 3 http://www.tropicos.org/Name/29202525 4 http://www.tropicos.org/Name/29202728 5 http://www.tropicos.org/Name/50089700 -6

    +6 +{% endhighlight %} + -

    Note that there are multiple names for some species because data sources have different names for the same species (resulting in more than one row in the data.frame 'outdf' for a species). We are leaving this up to the user to decide which to use. For example, for the species Buddleja montana there are two names for in the output

    +Note that there are multiple names for some species because data sources have different names for the same species (resulting in more than one row in the data.frame 'outdf' for a species). We are leaving this up to the user to decide which to use. For example, for the species _Buddleja montana_ there are two names for in the output -
    data <- ddply(outdf, .(submittedName), summarize, length(submittedName))
    -outdf[outdf$submittedName %in% as.character(data[data$..1 > 1, ][6, "submittedName"]), 
    -    ]
    +{% highlight r %} +data <- ddply(outdf, .(submittedName), summarize, length(submittedName)) +outdf[outdf$submittedName %in% as.character(data[data$..1 > 1, ][6, "submittedName"]), + ] +{% endhighlight %} -
           submittedName     acceptedName    sourceId score      matchedName
    +
    +
    +{% highlight text %}
    +       submittedName     acceptedName    sourceId score      matchedName
     123 Buddleja montana Buddleja montana iPlant_TNRS     1 Buddleja montana
     124 Buddleja montana          Montana        NCBI     1 Buddleja montana
              annotations                                         uri
     123 Britton ex Rusby       http://www.tropicos.org/Name/19000601
    -124             none http://www.ncbi.nlm.nih.gov/taxonomy/441235
    +124 none http://www.ncbi.nlm.nih.gov/taxonomy/441235 +{% endhighlight %} + + +The source iPlant matched the name, but NCBI actually gave back a genus of cricket (follow the link under the column uri for _Montana_). If you look at the page for _Buddleja_ on NCBI [here](http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=26473) there is no _Buddleja montana_ at all. -

    The source iPlant matched the name, but NCBI actually gave back a genus of cricket (follow the link under the column uri for Montana). If you look at the page for Buddleja on NCBI here there is no Buddleja montana at all.

    +Another thing we could do is look at the score that is returned. Let's look at those that are less than 1 (i.e., ) -

    Another thing we could do is look at the score that is returned. Let's look at those that are less than 1 (i.e., )

    +{% highlight r %} +outdf[outdf$score < 1, ] +{% endhighlight %} -
    outdf[outdf$score < 1, ]
    -
                            submittedName           acceptedName    sourceId
    +
    +{% highlight text %}
    +                        submittedName           acceptedName    sourceId
     94   Buddleja pichinchensis x bullata Buddleja pichinchensis iPlant_TNRS
     340                Diascia ellaphieae                        iPlant_TNRS
     495              Eremophila decipiens                        iPlant_TNRS
    @@ -297,89 +369,22 @@ 

    Query for your large species list with pauses between calls, with 3 seconds 808 1051 http://www.tropicos.org/Name/40023766 1081 http://www.tropicos.org/Name/40023766 -1097 http://www.tropicos.org/Name/40023766

    - -

    As we got this speies list from theplantlist.org, there aren't that many mistakes, but if it was my species list you know there would be many :)

    - -
    - -

    That's it. Try it out and let us know if you have any questions at info@ropensci.org, or ask questions/report problems at GitHub.

    - -
    - -

    Get the .Rmd file used to create this post at my github account - or .md file.

    - -

    Written in Markdown, with help from knitr.

    - -
    - -
    -

    - - Open Science Challenge - -

    - - - -

    center

    - -
    - -

    Open Science

    - -

    Science is becoming more open in many areas: publishing, data sharing, lab notebooks, and software. There are many benefits to open science. For example, sharing research data alongside your publications leads to increased citation rate (Piwowar et. al. 2007). In addition, data is becoming easier to share and reuse thanks to efforts like FigShare and Dryad.

    - -

    If you don't understand the problem we are currently facing due to lack of open science, watch this video:

    - - - -
    - -

    I just want Data

    - -

    Another way to look at this challenge is to think about how you can get data more easily. Right now you probably go to a website that has an interface to a database. You do a search, and then download a .csv file perhaps. Then you open it in Excel, and do some pivot tables to get the data in the right format. Only then will you bring the data in to R.

    - -

    The advantage of using our packages is that they allow you to do that data collection part in a few lines of code. Therefore, you can easily do all those steps in the above paragraph using a few lines of code in one R file. Why does this matter? You can more easily reproduce your own work months later after that summer vacation. In addition, others can reproduce your research more easily.

    - -
    - -

    The challenge

    - -

    We (ropensci) have just kicked off the rOpenSci Open Science Challenge. If you aren't familiar with rOpenSci, it is a software collective connecting scientists to open science data on the web. Since R is the most popular programming language for life scientists, it made sense to do this in R (instead of Python e.g.).

    - -
    - -

    What is the challenge about?

    - -

    At rOpenSci, we create R software to make getting open source text from publications and open source data easy. An important result of this is that we are facilitating open science. Why? Because R is an open source programming language, and all of our software is open source. . This challenge asks you to propose a project using one or more of our packages - or perhaps you want to propose a new dataset to connect to R. The rOpenSci core developer team will help you with any problems using our packages, and attempt to modify packages according to feedback from participants. Do you use one or more of our R packages? If you do, great. If not, check out our packages here.

    - -
    - -

    How to apply

    - -

    Just send an email to info@ropensci.org.

    - -
    +1097 http://www.tropicos.org/Name/40023766 +{% endhighlight %} -

    The deadline

    -

    January 31, 2013

    +As we got this speies list from [theplantlist.org](http://www.theplantlist.org/), there aren't that many mistakes, but if it was my species list you know there would be many :) -
    -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +*************** -

    Written in Markdown, with help from knitr, and knitcitations from Carl Boettiger.

    +### That's it. Try it out and let us know if you have any questions at [info@ropensci.org](mailto:info@ropensci.org), or [ask questions/report problems at GitHub](https://github.com/ropensci/taxize_/issues). -
    +*************** -

    References

    +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/2013-01-25-tnrs-use-case.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2013-01-25-tnrs-use-case.md). -

    Piwowar HA, Day RS, Fridsma DB and Ioannidis J (2007). -“Sharing Detailed Research Data is Associated With Increased Citation Rate.” -Plos One, 2. -http://dx.doi.org/10.1371/journal.pone.0000308.

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    diff --git a/_site/page25/index.html b/_site/page25/index.html index 6df7d22bc4..75f71b54dd 100644 --- a/_site/page25/index.html +++ b/_site/page25/index.html @@ -59,6 +59,76 @@

    Recology

      +
    +

    + + Open Science Challenge + +

    + + + + ![center](https://raw.github.com/sckott/sckott.github.com/master/public/img/ropensci_challenge.png) + +*************** + +### __Open Science__ + +Science is becoming more open in many areas: publishing, data sharing, lab notebooks, and software. There are many benefits to open science. For example, sharing research data alongside your publications leads to increased citation rate (Piwowar _et. al._ 2007). In addition, data is becoming easier to share and reuse thanks to efforts like [FigShare](http://figshare.com/) and [Dryad](http://datadryad.org/). + +If you don't understand the problem we are currently facing due to lack of open science, watch this video: + + + +*************** + +### __I just want Data__ + +Another way to look at this challenge is to think about how you can get data more easily. Right now you probably go to a website that has an interface to a database. You do a search, and then download a .csv file perhaps. Then you open it in Excel, and do some pivot tables to get the data in the right format. Only then will you bring the data in to R. + +The advantage of using our packages is that they allow you to do that data collection part in a few lines of code. Therefore, you can easily do all those steps in the above paragraph using a few lines of code in one R file. Why does this matter? You can more easily reproduce your own work months later after that summer vacation. In addition, others can reproduce your research more easily. + +*************** + +### __The challenge__ + +We ([ropensci](http://ropensci.org/)) have just kicked off the [rOpenSci Open Science Challenge](http://ropensci.org/open-science-challenge/). If you aren't familiar with rOpenSci, it is a software collective connecting scientists to open science data on the web. Since R is the most popular programming language for life scientists, it made sense to do this in R (instead of Python e.g.). + +*************** + +### __What is the challenge about?__ + +At rOpenSci, we create R software to make getting open source text from publications and open source data easy. An important result of this is that we are facilitating open science. Why? Because R is an open source programming language, and all of our software is open source. . This challenge asks you to propose a project using one or more of our packages - or perhaps you want to propose a new dataset to connect to R. The rOpenSci core developer team will help you with any problems using our packages, and attempt to modify packages according to feedback from participants. Do you use one or more of our R packages? If you do, great. If not, check out our packages [here](http://ropensci.org/packages/index.html). + +*************** + +### __How to apply__ + +Just send an email to [info@ropensci.org](mailto:info@ropensci.org?subject=rOpenSci Open Science Challenge). + +*************** + +### __The deadline__ + +January 31, 2013 + +*************** + +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2013-01-08-open-science-challenge.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2013-01-08-open-science-challenge.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and [knitcitations](https://github.com/cboettig/knitcitations) from [Carl Boettiger](http://www.carlboettiger.info/). + +*************** + +#### References +

    Piwowar HA, Day RS, Fridsma DB and Ioannidis J (2007). +“Sharing Detailed Research Data is Associated With Increased Citation Rate.” +Plos One, 2. +http://dx.doi.org/10.1371/journal.pone.0000308. + + +

    +

    @@ -68,36 +138,47 @@

    -

    The Global Invasive Species Database (GISD) (see their website for more info here) has data on the invasiveness status of many species. From taxize you can now query the GISD database.

    + The Global Invasive Species Database (GISD) (see their website for more info [here](http://www.issg.org/database/welcome/)) has data on the invasiveness status of many species. From `taxize` you can now query the GISD database. + +Introducing the function `gisd_isinvasive`. This function was contributed to `taxize` by [Ignasi Bartomeus](http://www.bartomeus.cat/es/ignasi/), a postdoc at the Swedish University Agricultural Sciences. + +There are two possible outputs from using `gisd_isinvasive`: "Invasive" or "Not in GISD". If you use `simplify=TRUE` in the function you get "Invasive" or "Not in GISD", but if you use `simplify=FALSE` you get verbose description of the invasive species instead of just "Invasive" (and you still just get "Not in GISD"). + +*************** -

    Introducing the function gisd_isinvasive. This function was contributed to taxize by Ignasi Bartomeus, a postdoc at the Swedish University Agricultural Sciences.

    +![center](http://sckott.github.io/public/img/gisd_small.png) -

    There are two possible outputs from using gisd_isinvasive: "Invasive" or "Not in GISD". If you use simplify=TRUE in the function you get "Invasive" or "Not in GISD", but if you use simplify=FALSE you get verbose description of the invasive species instead of just "Invasive" (and you still just get "Not in GISD").

    +*************** -
    +### Install taxize from GitHub -

    center

    +{% highlight r %} +# install_github('taxize_', 'ropensci') # install if you don't already +# have the GitHub version +library(taxize) +{% endhighlight %} -
    -

    Install taxize from GitHub

    +*************** -
    # install_github('taxize_', 'ropensci') # install if you don't already
    -# have the GitHub version
    -library(taxize)
    +### Make a vector of species -
    +{% highlight r %} +sp <- c("Carpobrotus edulis", "Rosmarinus officinalis", "Nasua nasua", "Martes melampus", + "Centaurea solstitialis") +{% endhighlight %} -

    Make a vector of species

    -
    sp <- c("Carpobrotus edulis", "Rosmarinus officinalis", "Nasua nasua", "Martes melampus", 
    -    "Centaurea solstitialis")
    +### Using the function `gisd_isinvasive` you can query the GISD database for the invasiveness status of your species, at least according to GISD. Calling `gisd_isinvasive` with the second parameter set to default `simplify=FALSE`, you get verbose output, with details on the species. -

    Using the function gisd_isinvasive you can query the GISD database for the invasiveness status of your species, at least according to GISD. Calling gisd_isinvasive with the second parameter set to default simplify=FALSE, you get verbose output, with details on the species.

    +{% highlight r %} +gisd_isinvasive(sp) +{% endhighlight %} -
    gisd_isinvasive(sp)
    -
    Checking species 1
    +
    +{% highlight text %}
    +Checking species 1
     Checking species 2
     Checking species 3
     Checking species 4
    @@ -115,13 +196,20 @@ 

    Using the function gisd_isinvasive you can query the GISD datab 2 Not in GISD 3 You searched for invasive species named Nasua nasua:1. Nasua nasua (mammal)            Interim profile, incomplete informationCommon Names: Achuni, Coatí, South American Coati, Tejón 4 You searched for invasive species named Martes melampus:1. Martes melampus (mammal)            Interim profile, incomplete informationCommon Names: Japanese Marten, Tsushima Island Marten -5 You searched for invasive species named Centaurea solstitialis:     1. Centaurea solstitialis (herb)       Centaurea solstitialis is a winter annual that can form dense impenetrable stands that displace desirable vegetation in natural areas, rangelands, and other places. It is best adapted to open grasslands with deep, well-drained soils and an annual precipitation range of 25 to 150cm per year. It is intolerant of shade. Although populations can occur at elevations as high as 2,400 m, most large infestations are found below 1,500 m. Human activities are the primary mechanisms for the long distance movement of C. solstitialis seed. The short, stiff, pappus bristles are covered with barbs that readily adhere to clothing, hair, and fur. The movement of contaminated hay and uncertified seed are also important long distance transportation mechanisms. Wind disperses seeds over short distances.\r\nCommon Names: geeldissel, golden star thistle, sonnwend-Flockenblume, St. Barnaby's thistle, yellow centaury, yellow cockspur, yellow star thistle\r\nSynonyms: Leucantha solstitialis (L.) A.& D. Löve

    +5 You searched for invasive species named Centaurea solstitialis:     1. Centaurea solstitialis (herb)       Centaurea solstitialis is a winter annual that can form dense impenetrable stands that displace desirable vegetation in natural areas, rangelands, and other places. It is best adapted to open grasslands with deep, well-drained soils and an annual precipitation range of 25 to 150cm per year. It is intolerant of shade. Although populations can occur at elevations as high as 2,400 m, most large infestations are found below 1,500 m. Human activities are the primary mechanisms for the long distance movement of C. solstitialis seed. The short, stiff, pappus bristles are covered with barbs that readily adhere to clothing, hair, and fur. The movement of contaminated hay and uncertified seed are also important long distance transportation mechanisms. Wind disperses seeds over short distances.\r\nCommon Names: geeldissel, golden star thistle, sonnwend-Flockenblume, St. Barnaby's thistle, yellow centaury, yellow cockspur, yellow star thistle\r\nSynonyms: Leucantha solstitialis (L.) A.& D. Löve +{% endhighlight %} + + +### Simpler output, just the invasive status. + +{% highlight r %} +gisd_isinvasive(sp, simplify = TRUE) +{% endhighlight %} -

    Simpler output, just the invasive status.

    -
    gisd_isinvasive(sp, simplify = TRUE)
    -
    Checking species 1
    +{% highlight text %}
    +Checking species 1
     Checking species 2
     Checking species 3
     Checking species 4
    @@ -133,13 +221,15 @@ 

    Simpler output, just the invasive status.

    2 Rosmarinus officinalis Not in GISD 3 Nasua nasua Invasive 4 Martes melampus Invasive -5 Centaurea solstitialis Invasive
    +5 Centaurea solstitialis Invasive +{% endhighlight %} -
    -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +********* -

    Written in Markdown, with help from knitr.

    +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-12-13-is-invasive.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-12-13-is-invasive.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    @@ -152,471 +242,95 @@

    -

    RStudio has a new product called Shiny that, quoting from their website, "makes it super simple for R users like you to turn analyses into interactive web applications that anyone can use". See here for more information.

    - -

    A Shiny basically consists of two files: a ui.r file and a server.r file. The ui.r file, as it says, provides the user interface, and the server.r file provides the the server logic.

    - -

    Below is what it looks like in the wild (on a browser).

    - -

    center

    + RStudio has a new product called `Shiny` that, quoting from their website, "makes it super simple for R users like you to turn analyses into interactive web applications that anyone can use". [See here](http://www.rstudio.com/shiny/) for more information. -

    It was pretty easy (for Ted Hart of rOpenSci) to build this app to demonstrate output from the ropensci rgbif package.

    +A `Shiny` basically consists of two files: a `ui.r` file and a `server.r` file. The `ui.r` file, as it says, provides the user interface, and the `server.r` file provides the the server logic. -
    +Below is what it looks like in the wild (on a browser). -

    You may need to install packages first.

    +![center](http://sckott.github.io/public/img/shiny_ss.png) -
    install.packages(c("shiny", "ggplot2", "plyr", "rgbif"))
    +It was pretty easy (for [Ted Hart of rOpenSci](http://emhart.github.com/)) to build this app to demonstrate output from the [`ropensci rgbif` package](http://cran.r-project.org/web/packages/rgbif/index.html). -
    +*************** -

    We tried to build in making real time API calls to GBIF's servers, but the calls took too long for web speed. So we prepare the data first, and then serve it up from saved data in a .rda file. Let's first prepare the data. --Well, this is what we do on the app itself, but see the next code block for

    +### You may need to install packages first. -
    library(rgbif)
    -splist <- c("Accipiter erythronemius", "Junco hyemalis", "Aix sponsa", "Haliaeetus leucocephalus", 
    -    "Corvus corone", "Threskiornis molucca", "Merops malimbicus")
    -out <- llply(splist, function(x) occurrencelist(x, coordinatestatus = T, maxresults = 100))
    -names(out) <- splist  # name each data.frame with the species names
    -setwd("~/ShinyApps/rgbif2")  # set directory
    -save(out, file = "speciesdata.rda")  # save the list of data.frames into an .rda file to serve up
    +{% highlight r %} +install.packages(c("shiny", "ggplot2", "plyr", "rgbif")) +{% endhighlight %} -
    -

    Here's the server logic

    +*************** -
    library(shiny)
    -library(plyr)
    -library(ggplot2)
    -library(rgbif)
    +### We tried to build in making real time API calls to GBIF's servers, but the calls took too long for web speed.  So we prepare the data first, and then serve it up from saved data in a `.rda` file. Let's first prepare the data. --Well, this is what we do on the app itself, but see the next code block for 
     
    -## Set up server output
    -shinyServer(function(input, output) {
    -    load("speciesdata.rda")
    -    # define function for server plot output
    -    output$gbifplot <- reactivePlot(function() {
    -        species <- input$spec
    -        df <- out[names(out) %in% species]
    -        print(gbifmap(df))
    -    })
    -    output$cbt <- reactiveText(function() {
    -    })
    -})
    +{% highlight r %} +library(rgbif) +splist <- c("Accipiter erythronemius", "Junco hyemalis", "Aix sponsa", "Haliaeetus leucocephalus", + "Corvus corone", "Threskiornis molucca", "Merops malimbicus") +out <- llply(splist, function(x) occurrencelist(x, coordinatestatus = T, maxresults = 100)) +names(out) <- splist # name each data.frame with the species names +setwd("~/ShinyApps/rgbif2") # set directory +save(out, file = "speciesdata.rda") # save the list of data.frames into an .rda file to serve up +{% endhighlight %} -
    -

    The user interface

    - -
    library(shiny)
    -
    -# Define UI for application that plots random distributions
    -shinyUI(pageWithSidebar(headerPanel("rgbif example"), sidebarPanel(checkboxGroupInput("spec", 
    -    "Species to map:", c(`Sharp shinned hawk (Accipiter erythronemius)` = "Accipiter erythronemius", 
    -        `Dark eyed junco (Junco hyemalis)` = "Junco hyemalis", `Wood duck (Aix sponsa)` = "Aix sponsa", 
    -        `Bald eagle (Haliaeetus leucocephalus)` = "Haliaeetus leucocephalus", 
    -        `Carrion crow (Corvus corone)` = "Corvus corone", `Australian White Ibis (Threskiornis molucca)` = "Threskiornis molucca", 
    -        `Rosy Bee-eater (Merops malimbicus)` = "Merops malimbicus"), selected = c("Bald eagle (Haliaeetus leucocephalus)"))), 
    -    mainPanel(h5("A map of your selected species: Please note that GBIF is queried for every selection so loading times vary"), 
    -        plotOutput("gbifplot"))))
    - -
    - -

    This should be all you need. To actually serve up the app in the web, request to be part of their beta-test of Shiny server on the web here.

    - -

    Go play with our Shiny app here to see the kind of visualization you can do with the rgbif package.

    - -
    - -

    Get the .Rmd file used to create this post at my github account - or .md file.

    - -

    Written in Markdown, with help from knitr.

    - -
    - -
    -

    - - One R package for all your taxonomic needs - -

    +*************** - +### Here's the server logic -

    UPDATE: there were some errors in the tests for taxize, so the binaries aren't avaiable yet. You can install from source though, see below.

    +{% highlight r %} +library(shiny) +library(plyr) +library(ggplot2) +library(rgbif) -

    Getting taxonomic information for the set of species you are studying can be a pain in the ass. You have to manually type, or paste in, your species one-by-one. Or, if you are lucky, there is a web service in which you can upload a list of species. Encyclopedia of Life (EOL) has a service where you can do this here. But is this reproducible? No.

    +## Set up server output +shinyServer(function(input, output) { + load("speciesdata.rda") + # define function for server plot output + output$gbifplot <- reactivePlot(function() { + species <- input$spec + df <- out[names(out) %in% species] + print(gbifmap(df)) + }) + output$cbt <- reactiveText(function() { + }) +}) +{% endhighlight %} -

    Getting your taxonomic information for your species can now be done programatically in R. Do you want to get taxonomic information from ITIS. We got that. Tropicos? We got that too. uBio? No worries, we got that. What about theplantlist.org? Yep, got that. Encyclopedia of Life? Indeed. What about getting sequence data for a taxon? Oh hell yeah, you can get sequences available for a taxon across all genes, or get all records for a taxon for a specific gene.

    -

    Of course this is all possible because these data providers have open APIs so that we can facilitate your computer talking to their database. Fun!

    +*************** -

    Why get your taxonomic data programatically? Because it's 1) faster than by hand in web sites/looking up in books, 2) reproducible, especially if you share your code (damnit!), and 3) you can easily mash up your new taxonomic data to get sequences to build a phylogeny, etc.

    +### The user interface -

    I'll give a few examples of using taxize based around use cases, that is, stuff someone might actually do instead of what particular functions do.

    +{% highlight r %} +library(shiny) -
    +# Define UI for application that plots random distributions +shinyUI(pageWithSidebar(headerPanel("rgbif example"), sidebarPanel(checkboxGroupInput("spec", + "Species to map:", c(`Sharp shinned hawk (Accipiter erythronemius)` = "Accipiter erythronemius", + `Dark eyed junco (Junco hyemalis)` = "Junco hyemalis", `Wood duck (Aix sponsa)` = "Aix sponsa", + `Bald eagle (Haliaeetus leucocephalus)` = "Haliaeetus leucocephalus", + `Carrion crow (Corvus corone)` = "Corvus corone", `Australian White Ibis (Threskiornis molucca)` = "Threskiornis molucca", + `Rosy Bee-eater (Merops malimbicus)` = "Merops malimbicus"), selected = c("Bald eagle (Haliaeetus leucocephalus)"))), + mainPanel(h5("A map of your selected species: Please note that GBIF is queried for every selection so loading times vary"), + plotOutput("gbifplot")))) +{% endhighlight %} -

    Install packages. You can get from CRAN or GitHub.

    -
    # install.packages("ritis") # uncomment if not already installed
    -# install_github('taxize_', 'ropensci') # uncomment if not already installed
    -# install.packages("taxize", type="source") # uncomment if not already installed
    -library(ritis)
    -library(taxize)
    +********* -
    +This should be all you need. To actually serve up the app in the web, request to be part of their beta-test of Shiny server on the web [here](https://rstudio.wufoo.com/forms/shiny-server-beta-program/). -

    Attach family names to a list of species.

    +Go play with our Shiny app [here](http://glimmer.rstudio.com/ropensci/rgbif2/) to see the kind of visualization you can do with the `rgbif` package. -

    I often have a list of species that I studied and simply want to get their family names to, for example, make a table for the paper I'm writing.

    +********* -
    # For one species
    -itis_name(query = "Poa annua", get = "family")
    +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-12-10-shiny-r.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-12-10-shiny-r.md). -
    Retrieving data for species ' Poa annua '
    - -
    [1] "Poaceae"
    - -
    # For many species
    -species <- c("Poa annua", "Abies procera", "Helianthus annuus", "Coffea arabica")
    -famnames <- sapply(species, itis_name, get = "family", USE.NAMES = F)
    - -
    Retrieving data for species ' Poa annua '
    - -
    Retrieving data for species ' Abies procera '
    - -
    Retrieving data for species ' Helianthus annuus '
    - -
    Retrieving data for species ' Coffea arabica '
    - -
    data.frame(species = species, family = famnames)
    - -
                species     family
    -1         Poa annua    Poaceae
    -2     Abies procera   Pinaceae
    -3 Helianthus annuus Asteraceae
    -4    Coffea arabica  Rubiaceae
    - -
    - -

    Resolve taxonomic names.

    - -

    This is a common use case for ecologists/evolutionary biologists, or at least should be. That is, species names you have for your own data, or when using other's data, could be old names - and if you need the newest names for your species list, how can you make this as painless as possible? You can query taxonomic data from many different sources with taxize.

    - -
    # The iPlantCollaborative provides access via API to their taxonomic name
    -# resolution service (TNRS)
    -mynames <- c("shorea robusta", "pandanus patina", "oryza sativa", "durio zibethinus", 
    -    "rubus ulmifolius", "asclepias curassavica", "pistacia lentiscus")
    -iplant_tnrsmatch(retrieve = "all", taxnames = c("helianthus annuus", "acacia", 
    -    "gossypium"), output = "names")
    - -
           AcceptedName   MatchFam MatchGenus MatchScore    Accept?
    -1 Helianthus annuus Asteraceae Helianthus          1 No opinion
    -2            Acacia   Fabaceae     Acacia          1 No opinion
    -3                                  Acacia          1 No opinion
    -4         Gossypium  Malvaceae  Gossypium          1 No opinion
    -     SubmittedNames
    -1 helianthus annuus
    -2            acacia
    -3            acacia
    -4         gossypium
    - -
    # The global names resolver is another attempt at this, hitting many
    -# different data sources
    -gnr_resolve(names = c("Helianthus annuus", "Homo sapiens"), returndf = TRUE)
    - -
       data_source_id    submitted_name       name_string score
    -1               4 Helianthus annuus Helianthus annuus 0.988
    -3              10 Helianthus annuus Helianthus annuus 0.988
    -5              12 Helianthus annuus Helianthus annuus 0.988
    -8             110 Helianthus annuus Helianthus annuus 0.988
    -11            159 Helianthus annuus Helianthus annuus 0.988
    -13            166 Helianthus annuus Helianthus annuus 0.988
    -15            169 Helianthus annuus Helianthus annuus 0.988
    -2               4      Homo sapiens      Homo sapiens 0.988
    -4              10      Homo sapiens      Homo sapiens 0.988
    -6              12      Homo sapiens      Homo sapiens 0.988
    -7             107      Homo sapiens      Homo sapiens 0.988
    -9             122      Homo sapiens      Homo sapiens 0.988
    -10            123      Homo sapiens      Homo sapiens 0.988
    -12            159      Homo sapiens      Homo sapiens 0.988
    -14            168      Homo sapiens      Homo sapiens 0.988
    -16            169      Homo sapiens      Homo sapiens 0.988
    -                     title
    -1                     NCBI
    -3                 Freebase
    -5                      EOL
    -8     Illinois Wildflowers
    -11                 CU*STAR
    -13                   nlbif
    -15           uBio NameBank
    -2                     NCBI
    -4                 Freebase
    -6                      EOL
    -7                AskNature
    -9                 BioPedia
    -10                   AnAge
    -12                 CU*STAR
    -14 Index to Organism Names
    -16           uBio NameBank
    - -
    # We can hit the Plantminer API too
    -plants <- c("Myrcia lingua", "Myrcia bella", "Ocotea pulchella", "Miconia", 
    -    "Coffea arabica var. amarella", "Bleh")
    -plantminer(plants)
    - -
    Myrcia lingua 
    -Myrcia bella 
    -Ocotea pulchella 
    -Miconia 
    -Coffea arabica var. amarella 
    -Bleh 
    - -
                  fam   genus                    sp             author
    -1       Myrtaceae  Myrcia                lingua   (O. Berg) Mattos
    -2       Myrtaceae  Myrcia                 bella           Cambess.
    -3       Lauraceae  Ocotea             pulchella (Nees & Mart.) Mez
    -4 Melastomataceae Miconia                    NA        Ruiz & Pav.
    -5       Rubiaceae  Coffea arabica var. amarella        A. Froehner
    -6              NA    Bleh                    NA                 NA
    -            source source.id   status confidence suggestion       database
    -1              TRO 100227036       NA         NA         NA       Tropicos
    -2             WCSP    131057 Accepted          H         NA The Plant List
    -3 WCSP (in review)    989758 Accepted          M         NA The Plant List
    -4              TRO  40018467       NA         NA         NA       Tropicos
    -5              TRO 100170231       NA         NA         NA       Tropicos
    -6               NA        NA       NA         NA       Baea             NA
    - -
    # We made a light wrapper around the Taxonstand package to search
    -# Theplantlist.org too
    -splist <- c("Heliathus annuus", "Abies procera", "Poa annua", "Platanus occidentalis", 
    -    "Carex abrupta", "Arctostaphylos canescens", "Ocimum basilicum", "Vicia faba", 
    -    "Quercus kelloggii", "Lactuca serriola")
    -tpl_search(taxon = splist)
    - -
                Genus      Species Infraspecific Plant.Name.Index
    -1       Heliathus       annuus                          FALSE
    -2           Abies      procera                           TRUE
    -3             Poa        annua                           TRUE
    -4        Platanus occidentalis                           TRUE
    -5           Carex      abrupta                           TRUE
    -6  Arctostaphylos    canescens                           TRUE
    -7          Ocimum    basilicum                           TRUE
    -8           Vicia         faba                           TRUE
    -9         Quercus    kelloggii                           TRUE
    -10        Lactuca     serriola                           TRUE
    -   Taxonomic.status      Family      New.Genus  New.Species
    -1                                    Heliathus       annuus
    -2          Accepted    Pinaceae          Abies      procera
    -3          Accepted     Poaceae            Poa        annua
    -4          Accepted Platanaceae       Platanus occidentalis
    -5          Accepted  Cyperaceae          Carex      abrupta
    -6          Accepted   Ericaceae Arctostaphylos    canescens
    -7          Accepted   Lamiaceae         Ocimum    basilicum
    -8          Accepted Leguminosae          Vicia         faba
    -9          Accepted    Fagaceae        Quercus    kelloggii
    -10         Accepted  Compositae        Lactuca     serriola
    -   New.Infraspecific Authority  Typo WFormat
    -1                              FALSE   FALSE
    -2                       Rehder FALSE   FALSE
    -3                           L. FALSE   FALSE
    -4                           L. FALSE   FALSE
    -5               <NA>     Mack. FALSE   FALSE
    -6                       Eastw. FALSE   FALSE
    -7                           L. FALSE   FALSE
    -8                           L. FALSE   FALSE
    -9               <NA>     Newb. FALSE   FALSE
    -10                          L. FALSE   FALSE
    - -
    - -

    Taxonomic hierarchies

    - -

    I often want the full taxonomic hierarchy for a set of species. That is, give me the family, order, class, etc. for my list of species. There are two different easy ways to do this with taxize. The first example uses EOL.

    - -
    - -

    Using EOL.

    - -
    pageid <- eol_search("Quercus douglasii")$id[1]  # first need to search for the taxon's page on EOL
    -out <- eol_pages(taxonconceptID = pageid)  # then we nee to get the taxon ID used by EOL
    -
    -# Notice that there are multiple different sources you can pull the
    -# hierarchy from. Note even that you can get the hierarchy from the ITIS
    -# service via this EOL API.
    -out
    - -
      identifier                 scientificName
    -1   46203061 Quercus douglasii Hook. & Arn.
    -2   48373995 Quercus douglasii Hook. & Arn.
    -                                  nameAccordingTo sourceIdentfier
    -1  Integrated Taxonomic Information System (ITIS)           19322
    -2 Species 2000 & ITIS Catalogue of Life: May 2012         9723391
    -  taxonRank
    -1   Species
    -2   Species
    - -
    # Then the hierarchy!
    -eol_hierarchy(out[out$nameAccordingTo == "Species 2000 & ITIS Catalogue of Life: May 2012", 
    -    "identifier"])
    - -
      sourceIdentifier  taxonID parentNameUsageID taxonConceptID
    -1         11017504 48276627                 0            281
    -2         11017505 48276628          48276627            282
    -3         11017506 48276629          48276628            283
    -4         11022500 48373354          48276629           4184
    -5         11025284 48373677          48373354           4197
    -  scientificName taxonRank
    -1        Plantae   kingdom
    -2  Magnoliophyta    phylum
    -3  Magnoliopsida     class
    -4        Fagales     order
    -5       Fagaceae    family
    - -
    eol_hierarchy(out[out$nameAccordingTo == "Integrated Taxonomic Information System (ITIS)", 
    -    "identifier"])  # and from ITIS, slightly different than ITIS output below, which includes taxa all the way down.
    - -
       sourceIdentifier  taxonID parentNameUsageID taxonConceptID
    -1            202422 46150613                 0            281
    -2            846492 46159776          46150613        8654492
    -3            846494 46161961          46159776       28818077
    -4            846496 46167532          46161961           4494
    -5            846504 46169010          46167532       28825126
    -6            846505 46169011          46169010            282
    -7             18063 46169012          46169011            283
    -8            846548 46202954          46169012       28859070
    -9             19273 46202955          46202954           4184
    -10            19275 46203022          46202955           4197
    -    scientificName     taxonRank
    -1          Plantae       kingdom
    -2   Viridaeplantae    subkingdom
    -3     Streptophyta  infrakingdom
    -4     Tracheophyta      division
    -5  Spermatophytina   subdivision
    -6     Angiospermae infradivision
    -7    Magnoliopsida         class
    -8          Rosanae    superorder
    -9          Fagales         order
    -10        Fagaceae        family
    - -
    - -

    And getting a taxonomic hierarchy using ITIS.

    - -
    # First, get the taxonomic serial number (TSN) that ITIS uses
    -mytsn <- get_tsn("Quercus douglasii", "sciname")
    - -
    Retrieving data for species ' Quercus douglasii '
    - -
    # Get the full taxonomic hierarchy for a taxon from the TSN
    -itis(mytsn, "getfullhierarchyfromtsn")
    - -
    $`1`
    -        parentName parentTsn      rankName         taxonName    tsn
    -1                                  Kingdom           Plantae 202422
    -2          Plantae    202422    Subkingdom    Viridaeplantae 846492
    -3   Viridaeplantae    846492  Infrakingdom      Streptophyta 846494
    -4     Streptophyta    846494      Division      Tracheophyta 846496
    -5     Tracheophyta    846496   Subdivision   Spermatophytina 846504
    -6  Spermatophytina    846504 Infradivision      Angiospermae 846505
    -7     Angiospermae    846505         Class     Magnoliopsida  18063
    -8    Magnoliopsida     18063    Superorder           Rosanae 846548
    -9          Rosanae    846548         Order           Fagales  19273
    -10         Fagales     19273        Family          Fagaceae  19275
    -11        Fagaceae     19275         Genus           Quercus  19276
    -12         Quercus     19276       Species Quercus douglasii  19322
    - -
    # But this can be even easier!
    -classification(get_tsn("Quercus douglasii"))  # Boom!
    - -
    Retrieving data for species ' Quercus douglasii '
    - -
    $`1`
    -        parentName parentTsn      rankName         taxonName    tsn
    -1                                  Kingdom           Plantae 202422
    -2          Plantae    202422    Subkingdom    Viridaeplantae 846492
    -3   Viridaeplantae    846492  Infrakingdom      Streptophyta 846494
    -4     Streptophyta    846494      Division      Tracheophyta 846496
    -5     Tracheophyta    846496   Subdivision   Spermatophytina 846504
    -6  Spermatophytina    846504 Infradivision      Angiospermae 846505
    -7     Angiospermae    846505         Class     Magnoliopsida  18063
    -8    Magnoliopsida     18063    Superorder           Rosanae 846548
    -9          Rosanae    846548         Order           Fagales  19273
    -10         Fagales     19273        Family          Fagaceae  19275
    -11        Fagaceae     19275         Genus           Quercus  19276
    -12         Quercus     19276       Species Quercus douglasii  19322
    - -
    # You can also do this easy-peasy route to a taxonomic hierarchy using
    -# uBio
    -classification(get_uid("Ornithorhynchus anatinus"))
    - -
    $`1`
    -       ScientificName         Rank    UID
    -1  cellular organisms      no rank 131567
    -2           Eukaryota superkingdom   2759
    -3        Opisthokonta      no rank  33154
    -4             Metazoa      kingdom  33208
    -5           Eumetazoa      no rank   6072
    -6           Bilateria      no rank  33213
    -7           Coelomata      no rank  33316
    -8       Deuterostomia      no rank  33511
    -9            Chordata       phylum   7711
    -10           Craniata    subphylum  89593
    -11         Vertebrata      no rank   7742
    -12      Gnathostomata   superclass   7776
    -13         Teleostomi      no rank 117570
    -14       Euteleostomi      no rank 117571
    -15      Sarcopterygii      no rank   8287
    -16          Tetrapoda      no rank  32523
    -17            Amniota      no rank  32524
    -18           Mammalia        class  40674
    -19        Prototheria      no rank   9254
    -20        Monotremata        order   9255
    -21  Ornithorhynchidae       family   9256
    -22    Ornithorhynchus        genus   9257
    - -
    - -

    Sequences?

    - -

    While you are at doing taxonomic stuff, you often wonder "hmmm, I wonder if there are any sequence data available for my species?" So, you can use get_seqs to search for specific genes for a species, and get_genes_avail to find out what genes are available for a certain species. These functions search for data on NCBI.

    - -
    # Get sequences (sequence is provied in output, but hiding here for
    -# brevity). What's nice about this is that it gets the longest sequence
    -# avaialable for the gene you searched for, and if there isn't anything
    -# available, it lets you get a sequence from a congener if you set
    -# getrelated=TRUE. The last column in the output data.frame also tells you
    -# what species the sequence is from.
    -out <- get_seqs(taxon_name = "Acipenser brevirostrum", gene = c("5S rRNA"), 
    -    seqrange = "1:3000", getrelated = T, writetodf = F)
    -out[, !names(out) %in% "sequence"]
    - -
                       taxon                                         gene_desc
    -1 Acipenser brevirostrum Acipenser brevirostrum 5S rRNA gene, clone BRE92A
    -     gi_no     acc_no length                 spused
    -1 60417159 AJ745069.1    121 Acipenser brevirostrum
    - -
    # Search for available sequences
    -out <- get_genes_avail(taxon_name = "Umbra limi", seqrange = "1:2000", getrelated = F)
    -out[grep("RAG1", out$genesavail, ignore.case = T), ]  # does the string 'RAG1' exist in any of the gene names
    - -
            spused length
    -414 Umbra limi    732
    -427 Umbra limi    959
    -434 Umbra limi   1631
    -                                                                            genesavail
    -414 isolate UlimA recombinase activating protein 1 (rag1) gene, exon 3 and partial cds
    -427           recombination-activating protein 1 (RAG1) gene, intron 2 and partial cds
    -434                        recombination-activating protein 1 (RAG1) gene, partial cds
    -    predicted
    -414  JX190826
    -427  AY459526
    -434  AY380548
    - -
    - -

    Get the .Rmd file used to create this post at my github account - or .md file.

    - -

    Written in Markdown, with help from knitr.

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    diff --git a/_site/page26/index.html b/_site/page26/index.html index 41d2bf1349..0ff33b737f 100644 --- a/_site/page26/index.html +++ b/_site/page26/index.html @@ -61,267 +61,743 @@

    Recology

    - - Altecology, a call to unconference action + + One R package for all your taxonomic needs

    - + + + UPDATE: there were some errors in the tests for `taxize`, so the binaries aren't avaiable yet. You can install from source though, see below. + + +Getting taxonomic information for the set of species you are studying can be a pain in the ass. You have to manually type, or paste in, your species one-by-one. Or, if you are lucky, there is a web service in which you can upload a list of species. Encyclopedia of Life (EOL) has a service where you can do this [here](http://gni.globalnames.org/parsers/new). But is this reproducible? No. + +Getting your taxonomic information for your species can now be done programatically in R. Do you want to get taxonomic information from ITIS. We got that. Tropicos? We got that too. uBio? No worries, we got that. What about theplantlist.org? Yep, got that. Encyclopedia of Life? Indeed. What about getting sequence data for a taxon? Oh hell yeah, you can get sequences available for a taxon across all genes, or get all records for a taxon for a specific gene. + +Of course this is all possible because these data providers have open APIs so that we can facilitate your computer talking to their database. Fun! + +Why get your taxonomic data programatically? Because it's 1) faster than by hand in web sites/looking up in books, 2) reproducible, especially if you share your code (damnit!), and 3) you can easily mash up your new taxonomic data to get sequences to build a phylogeny, etc. + +I'll give a few examples of using `taxize` based around use cases, that is, stuff someone might actually do instead of what particular functions do. + +*************** + +### Install packages. You can get from CRAN or GitHub. + +{% highlight r %} +# install.packages("ritis") # uncomment if not already installed +# install_github('taxize_', 'ropensci') # uncomment if not already installed +# install.packages("taxize", type="source") # uncomment if not already installed +library(ritis) +library(taxize) +{% endhighlight %} + + +*************** + +### Attach family names to a list of species. + +#### I often have a list of species that I studied and simply want to get their family names to, for example, make a table for the paper I'm writing. + +{% highlight r %} +# For one species +itis_name(query = "Poa annua", get = "family") +{% endhighlight %} + + + +{% highlight text %} + +Retrieving data for species ' Poa annua ' +{% endhighlight %} + + + +{% highlight text %} +[1] "Poaceae" +{% endhighlight %} + + + +{% highlight r %} + +# For many species +species <- c("Poa annua", "Abies procera", "Helianthus annuus", "Coffea arabica") +famnames <- sapply(species, itis_name, get = "family", USE.NAMES = F) +{% endhighlight %} + + + +{% highlight text %} + +Retrieving data for species ' Poa annua ' +{% endhighlight %} + + + +{% highlight text %} + +Retrieving data for species ' Abies procera ' +{% endhighlight %} + + + +{% highlight text %} + +Retrieving data for species ' Helianthus annuus ' +{% endhighlight %} + + + +{% highlight text %} + +Retrieving data for species ' Coffea arabica ' +{% endhighlight %} + + + +{% highlight r %} +data.frame(species = species, family = famnames) +{% endhighlight %} + + + +{% highlight text %} + species family +1 Poa annua Poaceae +2 Abies procera Pinaceae +3 Helianthus annuus Asteraceae +4 Coffea arabica Rubiaceae +{% endhighlight %} + + +*************** + +### Resolve taxonomic names. + +#### This is a common use case for ecologists/evolutionary biologists, or at least should be. That is, species names you have for your own data, or when using other's data, could be old names - and if you need the newest names for your species list, how can you make this as painless as possible? You can query taxonomic data from many different sources with `taxize`. + +{% highlight r %} +# The iPlantCollaborative provides access via API to their taxonomic name +# resolution service (TNRS) +mynames <- c("shorea robusta", "pandanus patina", "oryza sativa", "durio zibethinus", + "rubus ulmifolius", "asclepias curassavica", "pistacia lentiscus") +iplant_tnrsmatch(retrieve = "all", taxnames = c("helianthus annuus", "acacia", + "gossypium"), output = "names") +{% endhighlight %} + + + +{% highlight text %} + AcceptedName MatchFam MatchGenus MatchScore Accept? +1 Helianthus annuus Asteraceae Helianthus 1 No opinion +2 Acacia Fabaceae Acacia 1 No opinion +3 Acacia 1 No opinion +4 Gossypium Malvaceae Gossypium 1 No opinion + SubmittedNames +1 helianthus annuus +2 acacia +3 acacia +4 gossypium +{% endhighlight %} + + + +{% highlight r %} + +# The global names resolver is another attempt at this, hitting many +# different data sources +gnr_resolve(names = c("Helianthus annuus", "Homo sapiens"), returndf = TRUE) +{% endhighlight %} + + + +{% highlight text %} + data_source_id submitted_name name_string score +1 4 Helianthus annuus Helianthus annuus 0.988 +3 10 Helianthus annuus Helianthus annuus 0.988 +5 12 Helianthus annuus Helianthus annuus 0.988 +8 110 Helianthus annuus Helianthus annuus 0.988 +11 159 Helianthus annuus Helianthus annuus 0.988 +13 166 Helianthus annuus Helianthus annuus 0.988 +15 169 Helianthus annuus Helianthus annuus 0.988 +2 4 Homo sapiens Homo sapiens 0.988 +4 10 Homo sapiens Homo sapiens 0.988 +6 12 Homo sapiens Homo sapiens 0.988 +7 107 Homo sapiens Homo sapiens 0.988 +9 122 Homo sapiens Homo sapiens 0.988 +10 123 Homo sapiens Homo sapiens 0.988 +12 159 Homo sapiens Homo sapiens 0.988 +14 168 Homo sapiens Homo sapiens 0.988 +16 169 Homo sapiens Homo sapiens 0.988 + title +1 NCBI +3 Freebase +5 EOL +8 Illinois Wildflowers +11 CU*STAR +13 nlbif +15 uBio NameBank +2 NCBI +4 Freebase +6 EOL +7 AskNature +9 BioPedia +10 AnAge +12 CU*STAR +14 Index to Organism Names +16 uBio NameBank +{% endhighlight %} + + + +{% highlight r %} + +# We can hit the Plantminer API too +plants <- c("Myrcia lingua", "Myrcia bella", "Ocotea pulchella", "Miconia", + "Coffea arabica var. amarella", "Bleh") +plantminer(plants) +{% endhighlight %} + + + +{% highlight text %} +Myrcia lingua +Myrcia bella +Ocotea pulchella +Miconia +Coffea arabica var. amarella +Bleh +{% endhighlight %} + + + +{% highlight text %} + fam genus sp author +1 Myrtaceae Myrcia lingua (O. Berg) Mattos +2 Myrtaceae Myrcia bella Cambess. +3 Lauraceae Ocotea pulchella (Nees & Mart.) Mez +4 Melastomataceae Miconia NA Ruiz & Pav. +5 Rubiaceae Coffea arabica var. amarella A. Froehner +6 NA Bleh NA NA + source source.id status confidence suggestion database +1 TRO 100227036 NA NA NA Tropicos +2 WCSP 131057 Accepted H NA The Plant List +3 WCSP (in review) 989758 Accepted M NA The Plant List +4 TRO 40018467 NA NA NA Tropicos +5 TRO 100170231 NA NA NA Tropicos +6 NA NA NA NA Baea NA +{% endhighlight %} + + + +{% highlight r %} + +# We made a light wrapper around the Taxonstand package to search +# Theplantlist.org too +splist <- c("Heliathus annuus", "Abies procera", "Poa annua", "Platanus occidentalis", + "Carex abrupta", "Arctostaphylos canescens", "Ocimum basilicum", "Vicia faba", + "Quercus kelloggii", "Lactuca serriola") +tpl_search(taxon = splist) +{% endhighlight %} + + + +{% highlight text %} + Genus Species Infraspecific Plant.Name.Index +1 Heliathus annuus FALSE +2 Abies procera TRUE +3 Poa annua TRUE +4 Platanus occidentalis TRUE +5 Carex abrupta TRUE +6 Arctostaphylos canescens TRUE +7 Ocimum basilicum TRUE +8 Vicia faba TRUE +9 Quercus kelloggii TRUE +10 Lactuca serriola TRUE + Taxonomic.status Family New.Genus New.Species +1 Heliathus annuus +2 Accepted Pinaceae Abies procera +3 Accepted Poaceae Poa annua +4 Accepted Platanaceae Platanus occidentalis +5 Accepted Cyperaceae Carex abrupta +6 Accepted Ericaceae Arctostaphylos canescens +7 Accepted Lamiaceae Ocimum basilicum +8 Accepted Leguminosae Vicia faba +9 Accepted Fagaceae Quercus kelloggii +10 Accepted Compositae Lactuca serriola + New.Infraspecific Authority Typo WFormat +1 FALSE FALSE +2 Rehder FALSE FALSE +3 L. FALSE FALSE +4 L. FALSE FALSE +5 Mack. FALSE FALSE +6 Eastw. FALSE FALSE +7 L. FALSE FALSE +8 L. FALSE FALSE +9 Newb. FALSE FALSE +10 L. FALSE FALSE +{% endhighlight %} + + +*************** + +### Taxonomic hierarchies + +#### I often want the full taxonomic hierarchy for a set of species. That is, give me the family, order, class, etc. for my list of species. There are two different easy ways to do this with `taxize`. The first example uses EOL. + +*************** + +#### Using EOL. + +{% highlight r %} +pageid <- eol_search("Quercus douglasii")$id[1] # first need to search for the taxon's page on EOL +out <- eol_pages(taxonconceptID = pageid) # then we nee to get the taxon ID used by EOL + +# Notice that there are multiple different sources you can pull the +# hierarchy from. Note even that you can get the hierarchy from the ITIS +# service via this EOL API. +out +{% endhighlight %} + + + +{% highlight text %} + identifier scientificName +1 46203061 Quercus douglasii Hook. & Arn. +2 48373995 Quercus douglasii Hook. & Arn. + nameAccordingTo sourceIdentfier +1 Integrated Taxonomic Information System (ITIS) 19322 +2 Species 2000 & ITIS Catalogue of Life: May 2012 9723391 + taxonRank +1 Species +2 Species +{% endhighlight %} + + + +{% highlight r %} + +# Then the hierarchy! +eol_hierarchy(out[out$nameAccordingTo == "Species 2000 & ITIS Catalogue of Life: May 2012", + "identifier"]) +{% endhighlight %} + + + +{% highlight text %} + sourceIdentifier taxonID parentNameUsageID taxonConceptID +1 11017504 48276627 0 281 +2 11017505 48276628 48276627 282 +3 11017506 48276629 48276628 283 +4 11022500 48373354 48276629 4184 +5 11025284 48373677 48373354 4197 + scientificName taxonRank +1 Plantae kingdom +2 Magnoliophyta phylum +3 Magnoliopsida class +4 Fagales order +5 Fagaceae family +{% endhighlight %} + + + +{% highlight r %} +eol_hierarchy(out[out$nameAccordingTo == "Integrated Taxonomic Information System (ITIS)", + "identifier"]) # and from ITIS, slightly different than ITIS output below, which includes taxa all the way down. +{% endhighlight %} + -

    Note: This post is cross-posted on Sandra Chung's blog here.

    -
    +{% highlight text %} + sourceIdentifier taxonID parentNameUsageID taxonConceptID +1 202422 46150613 0 281 +2 846492 46159776 46150613 8654492 +3 846494 46161961 46159776 28818077 +4 846496 46167532 46161961 4494 +5 846504 46169010 46167532 28825126 +6 846505 46169011 46169010 282 +7 18063 46169012 46169011 283 +8 846548 46202954 46169012 28859070 +9 19273 46202955 46202954 4184 +10 19275 46203022 46202955 4197 + scientificName taxonRank +1 Plantae kingdom +2 Viridaeplantae subkingdom +3 Streptophyta infrakingdom +4 Tracheophyta division +5 Spermatophytina subdivision +6 Angiospermae infradivision +7 Magnoliopsida class +8 Rosanae superorder +9 Fagales order +10 Fagaceae family +{% endhighlight %} -

    The rise of the unconference

    -

    The Ecological Society of America meeting is holding its 98th annual meeting next year in Minneapolis, MN. Several thousand students and professionals in ecological science and education will gather to hear and read the latest work and ideas in ecology in the familiar poster and lecture formats that are the core of every major scientific conference. But a subset of these people will get a taste of something a little bit different: an unconference within the conference.

    +*************** -

    The most important difference between traditional science conferences and the unconference format is that it prioritizes human interaction. Often the best and most important parts of science meetings are the interactions between talks, next to posters, and at the end of the day over drinks. These connections pave the way for collaborations and friendships that nourish our professional and personal lives with shared opportunities, camaraderie and support.

    +#### And getting a taxonomic hierarchy using ITIS. -

    In recognition of the increasing relative importance of the “meeting” part of a science meeting, the unconference format emphasizes interaction over presentation. It attempts to engage participants to the maximum extent reasonable in discussion and doing. Science Online is a good example of this unconference format, in which the session topics are typically decided on democratically by the conference attendees (partly before arrival, partly on arrival), and you vote with your feet by going to and leaving sessions as you desire.

    +{% highlight r %} +# First, get the taxonomic serial number (TSN) that ITIS uses +mytsn <- get_tsn("Quercus douglasii", "sciname") +{% endhighlight %} -
    -

    Ecology is changing

    -

    Ecology is now adopting some of the same online and social tools that are already accelerating innovation in computing and other science disciplines. Ecologists, ecology students and educators are asking many of the same basic questions they have always asked: What should we be doing? How do we do it better and faster? Social media, open source software, open science, altmetrics, crowdsourcing, crowdfunding, data visualization, data sharing, alternative peer review, and an increasing emphasis on more and better communication and collaboration are just some of the newer tools being put forth to help address those questions in the 21st century.

    +{% highlight text %} -

    Social media is rapidly becoming more common in ecologists’ toolkits to disseminate news of their new papers, communicate about research and research tools, and even filter the deluge of publications. Tools like blogs, Twitter and Facebook are filling the communication gaps between annual meetings and adding a new layer of conversation and connection to conferences and classrooms.

    +Retrieving data for species ' Quercus douglasii ' +{% endhighlight %} -

    Social media, in turn, is connecting scientists directly to people besides their immediate colleagues who appreciate the impact of their work and want it to continue. The crowdfunding movement - exemplified by Kickstarter - has spurred similar alternative science funding projects such as SciFund. #SciFund project has shown that social media engagement increases donations to crowdfunded research (interview with Jarrett Byrnes).

    -

    In addition, we are in the era of big data, and this includes ecology. To deal with this “data deluge”, ecologists increasingly want to learn how to manage, share, retrieve, use, and visualize data. There are new tools for all of these tasks - we need to aid each other in learning them. Online and offline communities are coalescing around the development and dissemination of these tools for the benefit of ecological science, and they are meeting face-to-face at our ecological unconference.

    -

    Science in general is becoming increasingly complex and calling for larger and larger collaborations. This growth in turn is spurring a drive toward more openness and transparency within the culture of science. The more collaborative and complex scientific study becomes, the more scientists depend upon each other to do good work that we can all build upon with confidence. The often unstated assumption about ecology, and all of science, is that research findings are reproducible; but that assumption is quite shaky given the increasing number of retractions (see the Retraction Watch blog) and findings that much research is not reproducible (see media coverage here and here).

    +{% highlight r %} -

    A recent initiative seeks to facilitate attempts to reproduce research: The Reproducibility Initiative. Jarrett Byrnes spoke at #ESA2012 of how transparent, online discourse enhances our ability to discuss and improve our work and the work of our peers, both before and after publication.

    +# Get the full taxonomic hierarchy for a taxon from the TSN +itis(mytsn, "getfullhierarchyfromtsn") +{% endhighlight %} -

    Much of the ecological science community shares one or both of these goals: to do the best possible science, and to do it in a way that is most useful and accessible to colleagues and to society at large. The goal of this year’s ecological unconference is to introduce as many people as possible to resources - both tools and people - that can help all of us achieve those goals, on our own, or together.

    -

    One way we as ecologists can quickly make our research more reproducible is the way we write. By simply using tools that make reproducing what we have done easy to do, we can avoid retracted papers, failed drugs, and ruined careers.

    -
    +{% highlight text %} +$`1` + parentName parentTsn rankName taxonName tsn +1 Kingdom Plantae 202422 +2 Plantae 202422 Subkingdom Viridaeplantae 846492 +3 Viridaeplantae 846492 Infrakingdom Streptophyta 846494 +4 Streptophyta 846494 Division Tracheophyta 846496 +5 Tracheophyta 846496 Subdivision Spermatophytina 846504 +6 Spermatophytina 846504 Infradivision Angiospermae 846505 +7 Angiospermae 846505 Class Magnoliopsida 18063 +8 Magnoliopsida 18063 Superorder Rosanae 846548 +9 Rosanae 846548 Order Fagales 19273 +10 Fagales 19273 Family Fagaceae 19275 +11 Fagaceae 19275 Genus Quercus 19276 +12 Quercus 19276 Species Quercus douglasii 19322 -

    What are you proposing?

    +{% endhighlight %} -

    We originally thought about a separate event from ESA itself, modeled after Science Online, incorporating a variety of topics. However, we thought testing the waters for this sort of non-traditional unconference format would be better in 2013. We are gathering ideas from the community (see “How do I make my voice heard?” below). The ideas on the wiki that get the most traction, and have 1-2 champions that are willing to see the idea through and lead the workshop at ESA will be turned in to proposals for ESA workshops. In addition, we will be submitting a proposal for an Ignite session at ESA. To summarise, we will be running:

    -
      -
    • A few workshops (at lunch hours, and half-day). Topics may include: -
        -
      • Data sharing
      • -
      • Data visualization
      • -
      • Data management
      • -
      • Alternatives to academia
      • -
      • Blogging
      • -
      • Social media
      • -
      • Reproducible science writing
      • -
    • -
    • One Ignite session on “Tools for better/faster science”. See more about Ignite sessions here.
    • -
    • A “tweetup” event to socialize in a more relaxed atmosphere
    • -
    +{% highlight r %} -

    These will all be loosely aggregated under the #AltEcology hashtag.

    +# But this can be even easier! +classification(get_tsn("Quercus douglasii")) # Boom! +{% endhighlight %} -
    -

    How do I make my voice heard?

    -

    We have set up a wiki in which anyone can contribute. Please share your ideas and voice your support for existing ones at the wiki here. You can just throw ideas out there, or even propose new workshops and nominate people to lead them. We’re currently moving to transform existing ideas into ESA workshop and Ignite proposals to meet the November 29 deadline, but we’ll be incorporating input from the wiki right up to the meeting itself in August 2013.

    +{% highlight text %} -
    +Retrieving data for species ' Quercus douglasii ' +{% endhighlight %} -

    Get in touch

    -

    If you have any questions/comments, let us know in the comments section below, tweet us (Sandra: @sandramchung, Scott: @recology_), or email (Sandra, Scott).

    -
    +{% highlight text %} +$`1` + parentName parentTsn rankName taxonName tsn +1 Kingdom Plantae 202422 +2 Plantae 202422 Subkingdom Viridaeplantae 846492 +3 Viridaeplantae 846492 Infrakingdom Streptophyta 846494 +4 Streptophyta 846494 Division Tracheophyta 846496 +5 Tracheophyta 846496 Subdivision Spermatophytina 846504 +6 Spermatophytina 846504 Infradivision Angiospermae 846505 +7 Angiospermae 846505 Class Magnoliopsida 18063 +8 Magnoliopsida 18063 Superorder Rosanae 846548 +9 Rosanae 846548 Order Fagales 19273 +10 Fagales 19273 Family Fagaceae 19275 +11 Fagaceae 19275 Genus Quercus 19276 +12 Quercus 19276 Species Quercus douglasii 19322 -

    Get the .md file used to create this post at my github account. Written in Markdown, with help from knitr.

    +{% endhighlight %} + + + +{% highlight r %} + +# You can also do this easy-peasy route to a taxonomic hierarchy using +# uBio +classification(get_uid("Ornithorhynchus anatinus")) +{% endhighlight %} + + + +{% highlight text %} +$`1` + ScientificName Rank UID +1 cellular organisms no rank 131567 +2 Eukaryota superkingdom 2759 +3 Opisthokonta no rank 33154 +4 Metazoa kingdom 33208 +5 Eumetazoa no rank 6072 +6 Bilateria no rank 33213 +7 Coelomata no rank 33316 +8 Deuterostomia no rank 33511 +9 Chordata phylum 7711 +10 Craniata subphylum 89593 +11 Vertebrata no rank 7742 +12 Gnathostomata superclass 7776 +13 Teleostomi no rank 117570 +14 Euteleostomi no rank 117571 +15 Sarcopterygii no rank 8287 +16 Tetrapoda no rank 32523 +17 Amniota no rank 32524 +18 Mammalia class 40674 +19 Prototheria no rank 9254 +20 Monotremata order 9255 +21 Ornithorhynchidae family 9256 +22 Ornithorhynchus genus 9257 + +{% endhighlight %} + + +*************** + +### Sequences? + +#### While you are at doing taxonomic stuff, you often wonder "hmmm, I wonder if there are any sequence data available for my species?" So, you can use `get_seqs` to search for specific genes for a species, and `get_genes_avail` to find out what genes are available for a certain species. These functions search for data on [NCBI](http://www.ncbi.nlm.nih.gov/). + +{% highlight r %} +# Get sequences (sequence is provied in output, but hiding here for +# brevity). What's nice about this is that it gets the longest sequence +# avaialable for the gene you searched for, and if there isn't anything +# available, it lets you get a sequence from a congener if you set +# getrelated=TRUE. The last column in the output data.frame also tells you +# what species the sequence is from. +out <- get_seqs(taxon_name = "Acipenser brevirostrum", gene = c("5S rRNA"), + seqrange = "1:3000", getrelated = T, writetodf = F) +out[, !names(out) %in% "sequence"] +{% endhighlight %} + + + +{% highlight text %} + taxon gene_desc +1 Acipenser brevirostrum Acipenser brevirostrum 5S rRNA gene, clone BRE92A + gi_no acc_no length spused +1 60417159 AJ745069.1 121 Acipenser brevirostrum +{% endhighlight %} + + + +{% highlight r %} + +# Search for available sequences +out <- get_genes_avail(taxon_name = "Umbra limi", seqrange = "1:2000", getrelated = F) +out[grep("RAG1", out$genesavail, ignore.case = T), ] # does the string 'RAG1' exist in any of the gene names +{% endhighlight %} + + + +{% highlight text %} + spused length +414 Umbra limi 732 +427 Umbra limi 959 +434 Umbra limi 1631 + genesavail +414 isolate UlimA recombinase activating protein 1 (rag1) gene, exon 3 and partial cds +427 recombination-activating protein 1 (RAG1) gene, intron 2 and partial cds +434 recombination-activating protein 1 (RAG1) gene, partial cds + predicted +414 JX190826 +427 AY459526 +434 AY380548 +{% endhighlight %} + + + +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-12-06-taxize.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-12-06-taxize.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).

    - - Displaying Your Data in Google Earth Using R2G2 + + Altecology, a call to unconference action

    - + -

    Have you ever wanted to easily visualize your ecology data in Google Earth? R2G2 is a new package for R, available via R CRAN and formally described in this Molecular Ecology Resources article, which provides a user-friendly bridge between R and the Google Earth interface. Here, we will provide a brief introduction to the package, including a short tutorial, and then encourage you to try it out with your own data!

    + Note: This post is cross-posted on Sandra Chung's blog [here](http://sandrachung.com/). -

    Nils Arrigo, with some help from Loren Albert, Mike Barker, and Pascal Mickelson (one of the contributors to Recology), has created a set of R tools to generate KML files to view data with geographic components. Instead of just telling you what the tools can do, though, we will show you a couple of examples using publically available data. Note: a number of individual files are linked to throughout the tutorial below, but just in case you would rather download all the tutorial files in one go, have at it (tutorial zip file).

    +********* -

    Among the basic tools in R2G2 is the ability to place features—like dots, shapes, or images (including plots you produced in R)— that represent discrete observations at specific geographical locations. For example, in the figure below, we show the migratory path of a particular turkey vulture in autumn of three successive years (red = 2009; blue = 2010; green = 2011).

    +### The rise of the unconference +The Ecological Society of America meeting is holding its [98th annual meeting](http://www.esa.org/minneapolis/) next year in Minneapolis, MN. Several thousand students and professionals in ecological science and education will gather to hear and read the latest work and ideas in ecology in the familiar poster and lecture formats that are the core of every major scientific conference. But a subset of these people will get a taste of something a little bit different: an unconference within the conference. -
    -Google Earth image with three successive years of a particular turkey vulture's migration
    -Google Earth imagery showing migratory path of a particular turkey vulture in 2009, 2010, and 2011. -
    +The most important difference between traditional science conferences and the unconference format is that it prioritizes human interaction. Often the best and most important parts of science meetings are the interactions between talks, next to posters, and at the end of the day over drinks. These connections pave the way for collaborations and friendships that nourish our professional and personal lives with shared opportunities, camaraderie and support. -

    We use the PolyLines2GE function that is part of R2G2 to create line segments between the geographical coordinates which have been obtained from a turkey vulture tagged with a transponder (data accessed via the Movebank Data Repository and is from the Turkey Vulture Acopian Center USA GPS). The PolyLines2GE function looks like the following:

    - -
    PolyLines2GE(coords = vulture_path[,2:3],  
    -            nesting = vulture_path[,1],  
    -            colors = "auto",  
    -            goo = "Vulture_Path.kml",  
    -            maxAlt = 1e4,  
    -            fill = FALSE,  
    -            closepoly = FALSE,  
    -            lwd = 2,  
    -            extrude = 0)
    - +In recognition of the increasing relative importance of the “meeting” part of a science meeting, the unconference format emphasizes interaction over presentation. It attempts to engage participants to the maximum extent reasonable in discussion and doing. [Science Online](http://scienceonline.com/) is a good example of this unconference format, in which the session topics are typically decided on democratically by the conference attendees (partly before arrival, partly on arrival), and you vote with your feet by going to and leaving sessions as you desire. -

    It expects to receive an array ("coords") containing latitude and longitude coordinates in decimal degrees. Additionally, each individual coordinate has a flag associated with it ("nesting") so that each data series can be distinguished. Illustrating what you need is easier than explaining:

    +********* -
    nesting longitude latitude
    -1   long1A      lat1A
    -1   long1B      lat1B
    -1   long1C      lat1C
    -2   long2A      lat2A
    -2   long2B      lat2B
    -3   long3A      lat3A
    -3   long3B      lat3B
    -3   long3C      lat3C
    +### Ecology is changing +Ecology is now adopting some of the same online and social tools that are already accelerating innovation in computing and other science disciplines. Ecologists, ecology students and educators are asking many of the same basic questions they have always asked: What should we be doing? How do we do it better and faster? Social media, open source software, open science, altmetrics, crowdsourcing, crowdfunding, data visualization, data sharing, alternative peer review, and an increasing emphasis on more and better communication and collaboration are just some of the newer tools being put forth to help address those questions in the 21st century. -

    Feeding the columns of this array to the function results in three differently colored lines: the first would connect the coordinates 1A-1B-1C, while the second would connect 2A-2B, and the third would connect 3A-3B-3C. The only other user-defined input that is strictly necessary is the output file name ("Vulture_Path.kml" in this case). The other options—which allow you control of the appearance of the lines and of the altitude at which your line displays in Google Earth—have reasonable defaults that are well-documented in the function definition itself. Check out this example in Google Earth by downloading the KML file. Alternatively, download the annotated R script and generate the KML file for yourself.

    +Social media is rapidly becoming more common in ecologists’ toolkits to disseminate news of their new papers, communicate about research and research tools, and even filter the deluge of publications. Tools like blogs, Twitter and Facebook are filling the communication gaps between annual meetings and adding a new layer of conversation and connection to conferences and classrooms. -

    Now, let's say you wanted to get a sense of the range and abundance of two congeneric species. In this second example, we use the Hist2GE function to create a histogram—overlaid on the surface of the earth—which shows the species distribution of Mimulus lewisii (red) and Mimulus nasutus (blue) in North America.

    +Social media, in turn, is connecting scientists directly to people besides their immediate colleagues who appreciate the impact of their work and want it to continue. The crowdfunding movement - exemplified by Kickstarter - has spurred similar alternative science funding projects such as SciFund. #SciFund project has shown that social media engagement increases donations to crowdfunded research ([interview with Jarrett Byrnes](http://jecology.libsyn.com/interview-with-jarrett-byrnes-on-science-crowdfunding)). -
    -Google Earth image showing the distribution of Mimulus in North America
    -Google Earth imagery showing the species distribution of Mimulus lewisii and Mimulus nasutus -
    +In addition, we are in the era of big data, and [this includes ecology](http://www.neoninc.org/news/big-data-part-i). To deal with this “data deluge”, ecologists increasingly want to learn how to manage, share, retrieve, use, and visualize data. There are new tools for all of these tasks - we need to aid each other in learning them. Online and offline communities are coalescing around the development and dissemination of these tools for the benefit of ecological science, and they are meeting face-to-face at our ecological unconference. -

    As you might expect, each polygon represents an occurrence of the species in question, while the height of the polygon represents the abundance of the species at that geographic location. Species occurring within a particular distance of each other have been grouped together for the histogram. For this example, we retrieve data from the GBIF database from within R (see the example code for how that is done). Inputs to the Hist2GE function are:

    +Science in general is becoming increasingly complex and calling for larger and larger collaborations. This growth in turn is spurring a drive toward more openness and transparency within the culture of science. The more collaborative and complex scientific study becomes, the more scientists depend upon each other to do good work that we can all build upon with confidence. The often unstated assumption about ecology, and all of science, is that research findings are reproducible; but that assumption is quite shaky given the increasing number of retractions (see the [Retraction Watch blog](http://retractionwatch.wordpress.com/)) and findings that much research is not reproducible (see media coverage [here](http://www.nature.com/nature/journal/v483/n7391/full/483531a.html) and [here](http://www.reuters.com/article/2012/03/28/us-science-cancer-idUSBRE82R12P20120328)). -
    Hist2GE(coords = MyCompleteData[, 8:7],  
    -        species = MyCompleteData[, 1],  
    -        grid = grid10000,  
    -        goo = "Mimulus",  
    -        nedges = 6,  
    -        orient = 45,  
    -        maxAlt = 1e4)
    +A recent initiative seeks to facilitate attempts to reproduce research: [The Reproducibility Initiative](https://www.scienceexchange.com/reproducibility). Jarrett Byrnes [spoke at #ESA2012](http://www.slideshare.net/JarrettByrnes/taking-the-ecological-conversation-online) of how transparent, online discourse enhances our ability to discuss and improve our work and the work of our peers, both before and after publication. -

    As in the first example, the function expects to receive an array containing the longitude and latitude ("coords"), a vector distinguishing individual observations ("species"), and an output file name ("goo"). In this case, however, we also need to specify the size of the grid we will use to group observations together to construct the histogram. Several pre-defined grid sizes are included in the package to do this grouping; these all cover large geographic areas and therefore must account for the curvature of the earth. Here is a list of these pre-defined grids:

    +Much of the ecological science community shares one or both of these goals: to do the best possible science, and to do it in a way that is most useful and accessible to colleagues and to society at large. The goal of this year’s ecological unconference is to introduce as many people as possible to resources - both tools and people - that can help all of us achieve those goals, on our own, or together. - - - - - - - -
    Grid NameApproximate Area of Grid Division
    grid2000025,500 sq. km
    grid1000051,000 sq. km
    grid5000102,000 sq. km
    grid5001,020,000 sq. km
    grid5010,200,000 sq. km
    +One way we as ecologists can quickly make our research more reproducible is the way we write. By simply using tools that make reproducing what we have done easy to do, we can avoid retracted papers, failed drugs, and ruined careers. + +********* + +### What are you proposing? +We originally thought about a separate event from ESA itself, modeled after [Science Online](http://scienceonline.com/), incorporating a variety of topics. However, we thought testing the waters for this sort of non-traditional unconference format would be better in 2013. We are gathering ideas from the community (see “How do I make my voice heard?” below). The ideas on the wiki that get the most traction, and have 1-2 champions that are willing to see the idea through and lead the workshop at ESA will be turned in to proposals for ESA workshops. In addition, we will be submitting a proposal for an Ignite session at ESA. To summarise, we will be running: -

    For smaller geographic areas (less than 25,000 square kilometers, or an area of about 158 km per side), you can customize the grid size by specifying the bounds of your region of interest in decimal degrees, as well as the coarseness of the grid within that region. While it is possible to use this custom grid definition for larger sizes, beware that not all areas defined thusly will be of equal size due to the earth's curvature (obviously the bigger you go, the worse it gets...). Finally, you again have control over the display parameters of the histogram. In particular, the maximum altitude ("maxAlt") controls how high the tallest bar in the histogram will go. Here is the resulting KML file, as well as the annotated R script so you can further explore the example.

    ++ A few workshops (at lunch hours, and half-day). Topics may include: + + Data sharing + + Data visualization + + Data management + + Alternatives to academia + + Blogging + + Social media + + Reproducible science writing ++ One Ignite session on “Tools for better/faster science”. See more about Ignite sessions [here](http://www.esa.org/minneapolis/ignite.php). ++ A “tweetup” event to socialize in a more relaxed atmosphere -

    More complex visual representations are also possible using R2G2. For instance, you can also create contour plots or phylogenies overlaid directly on the surface of the earth. We included a couple examples of this type in our Molecular Ecology Resources article, and if the response seems good, we may post a follow up tutorial showing how we went about creating those examples.

    +These will all be loosely aggregated under the [#AltEcology](https://twitter.com/search/realtime?q=%23AltEcology) hashtag. -

    It is our sincere hope that you will use the tools in R2G2 to more effectively visualize the geographical aspects of your data. In particular, we are excited about the potential for incorporating R2G2 into data analysis pipelines connecting analysis in R with data visualization and exploration in Google Earth. Ultimately, the inclusion of KML files as supplementary materials to journal articles should also enrich one's understanding of the data being presented!

    +********* -

    Note: If you make something cool using R2G2, please post a link to your KML file in the comments; we would love to see!

    +### How do I make my voice heard? +We have set up a wiki in which anyone can contribute. Please share your ideas and voice your support for existing ones at the wiki [here](http://ecologyunconference.wikispaces.com/). You can just throw ideas out there, or even propose new workshops and nominate people to lead them. We’re currently moving to transform existing ideas into ESA workshop and Ignite proposals to meet the [November 29 deadline](http://www.esa.org/minneapolis/workshop.php), but we’ll be incorporating input from the wiki right up to the meeting itself in August 2013. -

    Citation information for R2G2:
    -Arrigo, N., Albert, L. P., Mickelson, P. G. and Barker, M. S. (2012), Quantitative visualization of biological data in Google Earth using R2G2, an R CRAN package. Molecular Ecology Resources. doi: 10.1111/1755-0998.12012

    +********* + +### Get in touch +If you have any questions/comments, let us know in the comments section below, tweet us (Sandra: [@sandramchung](https://twitter.com/sandramchung), Scott: [@recology_](https://twitter.com/recology_)), or email ([Sandra](mailto:sandra.m.chung@gmail.com), [Scott](mailto:myrmecocystus@gmail.com)). + +********* +#### Get the .md file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-11-15-altecology.md). Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).

    - - Getting taxonomic names downstream + + Displaying Your Data in Google Earth Using R2G2

    - - -

    It can be a pain in the ass to get taxonomic names. For example, I sometimes need to get all the Class names for a set of species. This is a relatively easy problem using the ITIS API (example below).

    + -

    The much harder problem is getting all the taxonomic names downstream. ITIS doesn't provide an API method for this - well, they do (getHirerachyDownFromTSN), but it only provides direct children (e.g., the genera within a tribe - but it won't give all the species within each genus).

    + Have you ever wanted to easily visualize your ecology data in [Google Earth][googleearth]? [R2G2][r2g2] is a new package for R, available via [R CRAN][rcran] and formally described in [this Molecular Ecology Resources article][MERarticle], which provides a user-friendly bridge between R and the Google Earth interface. Here, we will provide a brief introduction to the package, including a short tutorial, and then encourage you to try it out with your own data! -

    So in the taxize package, we wrote a function called downstream, which allows you to get taxonomic names to any downstream point, e.g.:

    +[Nils Arrigo][nils], with some help from [Loren Albert][loren], [Mike Barker][mike], and Pascal Mickelson (one of the contributors to [Recology][recology]), has created a set of R tools to generate KML files to view data with geographic components. Instead of just telling you what the tools can do, though, we will show you a couple of examples using publically available data. Note: a number of individual files are linked to throughout the tutorial below, but just in case you would rather download all the tutorial files in one go, have at it ([tutorial zip file][tutorialfile]). -
      -
    • get all Classes within Animalia,
    • -
    • get all Species within a Family
    • -
    • etc.
    • -
    +Among the basic tools in [R2G2][r2g2] is the ability to place features—like dots, shapes, or images (including plots you produced in R)— that represent discrete observations at specific geographical locations. For example, in the figure below, we show the migratory path of a particular turkey vulture in autumn of three successive years (red = 2009; blue = 2010; green = 2011). -

    Install packages. You can get other packages from CRAN, but taxize is only on GitHub for now.

    -
    1 # install_github('ritis', 'ropensci') # uncomment if not already installed
    -2 # install_github('taxize_', 'ropensci') # uncomment if not already
    -3 # installed
    -4 library(ritis)
    -5 library(taxize)
    +
    +Google Earth image with three successive years of a particular turkey vulture's migration
    +Google Earth imagery showing migratory path of a particular turkey vulture in 2009, 2010, and 2011. +
    -

    Get upstream taxonomic names.

    -
    1 # Search for a TSN by scientific name
    -2 df <- searchbyscientificname("Tardigrada")
    -3 tsn <- df[df$combinedname %in% "Tardigrada", "tsn"]
    -4 
    -5 # Get just one immediate higher taxonomic name
    -6 gethierarchyupfromtsn(tsn = tsn)
    +We use the *PolyLines2GE* function that is part of [R2G2][r2g2] to create line segments between the geographical coordinates which have been obtained from a turkey vulture tagged with a transponder (data accessed via the [Movebank Data Repository][movebank] and is from the [Turkey Vulture Acopian Center USA GPS][turkeyvulturestudy]). The *PolyLines2GE* function looks like the following: + +{% highlight r %} +PolyLines2GE(coords = vulture_path[,2:3], + nesting = vulture_path[,1], + colors = "auto", + goo = "Vulture_Path.kml", + maxAlt = 1e4, + fill = FALSE, + closepoly = FALSE, + lwd = 2, + extrude = 0) +{% endhighlight %} + +It expects to receive an array ("coords") containing latitude and longitude coordinates in decimal degrees. Additionally, each individual coordinate has a flag associated with it ("nesting") so that each data series can be distinguished. Illustrating what you need is easier than explaining: +{% highlight text %} +nesting longitude latitude +1 long1A lat1A +1 long1B lat1B +1 long1C lat1C +2 long2A lat2A +2 long2B lat2B +3 long3A lat3A +3 long3B lat3B +3 long3C lat3C +{% endhighlight %} +Feeding the columns of this array to the function results in three differently colored lines: the first would connect the coordinates 1A-1B-1C, while the second would connect 2A-2B, and the third would connect 3A-3B-3C. The only other user-defined input that is strictly necessary is the output file name ("Vulture_Path.kml" in this case). The other options—which allow you control of the appearance of the lines and of the altitude at which your line displays in Google Earth—have reasonable defaults that are well-documented in the function definition itself. Check out this example in Google Earth [by downloading the KML file][vultureKML]. Alternatively, [download the annotated R script][vultureR] and generate the KML file for yourself. + +Now, let's say you wanted to get a sense of the range and abundance of two congeneric species. In this second example, we use the *Hist2GE* function to create a histogram—overlaid on the surface of the earth—which shows the species distribution of *Mimulus lewisii* (red) and *Mimulus nasutus* (blue) in North America. -
      parentName parentTsn rankName  taxonName    tsn
    -1   Animalia    202423   Phylum Tardigrada 155166
    -
    1 # Get full hierarchy upstream from TSN
    -2 getfullhierarchyfromtsn(tsn = tsn)
    +
    +Google Earth image showing the distribution of Mimulus in North America
    +Google Earth imagery showing the species distribution of Mimulus lewisii and Mimulus nasutus +
    -
      parentName parentTsn rankName        taxonName    tsn
    -1                       Kingdom         Animalia 202423
    -2   Animalia    202423   Phylum       Tardigrada 155166
    -3 Tardigrada    155166    Class     Eutardigrada 155362
    -4 Tardigrada    155166    Class Heterotardigrada 155167
    -5 Tardigrada    155166    Class   Mesotardigrada 155358
    -

    Get taxonomc names downstream.

    +As you might expect, each polygon represents an occurrence of the species in question, while the height of the polygon represents the abundance of the species at that geographic location. Species occurring within a particular distance of each other have been grouped together for the histogram. For this example, we retrieve data from the [GBIF][gbif] database from within R (see the example code for how that is done). Inputs to the Hist2GE function are: +{% highlight r %} +Hist2GE(coords = MyCompleteData[, 8:7], + species = MyCompleteData[, 1], + grid = grid10000, + goo = "Mimulus", + nedges = 6, + orient = 45, + maxAlt = 1e4) +{% endhighlight %} +As in the first example, the function expects to receive an array containing the longitude and latitude ("coords"), a vector distinguishing individual observations ("species"), and an output file name ("goo"). In this case, however, we also need to specify the size of the grid we will use to group observations together to construct the histogram. Several pre-defined grid sizes are included in the package to do this grouping; these all cover large geographic areas and therefore must account for the curvature of the earth. Here is a list of these pre-defined grids: -
    1 # Get genera downstream fromthe Class Bangiophyceae
    -2 downstream(846509, "Genus")
    + + + + + + + +
    Grid NameApproximate Area of Grid Division
    grid2000025,500 sq. km
    grid1000051,000 sq. km
    grid5000102,000 sq. km
    grid5001,020,000 sq. km
    grid5010,200,000 sq. km
    -
        tsn parentName parentTsn   taxonName rankId rankName
    -1 11531 Bangiaceae     11530      Bangia    180    Genus
    -2 11540 Bangiaceae     11530    Porphyra    180    Genus
    -3 11577 Bangiaceae     11530 Porphyrella    180    Genus
    -4 11580 Bangiaceae     11530 Conchocelis    180    Genus
    +For smaller geographic areas (less than 25,000 square kilometers, or an area of about 158 km per side), you can customize the grid size by specifying the bounds of your region of interest in decimal degrees, as well as the coarseness of the grid within that region. While it is possible to use this custom grid definition for larger sizes, beware that not all areas defined thusly will be of equal size due to the earth's curvature (obviously the bigger you go, the worse it gets...). Finally, you again have control over the display parameters of the histogram. In particular, the maximum altitude ("maxAlt") controls how high the tallest bar in the histogram will go. Here is [the resulting KML file][mimulusKML], as well as [the annotated R script][mimulusR] so you can further explore the example. -
    1 # Get families downstream from Acridoidea
    -2 downstream(650497, "Family")
    +More complex visual representations are also possible using [R2G2][r2g2]. For instance, you can also create contour plots or phylogenies overlaid directly on the surface of the earth. We included a couple examples of this type in [our Molecular Ecology Resources article][MERarticle], and if the response seems good, we may post a follow up tutorial showing how we went about creating those examples. -
          tsn parentName parentTsn      taxonName rankId rankName
    -1  102195 Acridoidea    650497      Acrididae    140   Family
    -2  650502 Acridoidea    650497     Romaleidae    140   Family
    -3  657472 Acridoidea    650497    Charilaidae    140   Family
    -4  657473 Acridoidea    650497   Lathiceridae    140   Family
    -5  657474 Acridoidea    650497     Lentulidae    140   Family
    -6  657475 Acridoidea    650497    Lithidiidae    140   Family
    -7  657476 Acridoidea    650497   Ommexechidae    140   Family
    -8  657477 Acridoidea    650497    Pamphagidae    140   Family
    -9  657478 Acridoidea    650497  Pyrgacrididae    140   Family
    -10 657479 Acridoidea    650497    Tristiridae    140   Family
    -11 657492 Acridoidea    650497 Dericorythidae    140   Family
    +It is our sincere hope that you will use the tools in [R2G2][r2g2] to more effectively visualize the geographical aspects of your data. In particular, we are excited about the potential for incorporating [R2G2][r2g2] into data analysis pipelines connecting analysis in R with data visualization and exploration in Google Earth. Ultimately, the inclusion of KML files as supplementary materials to journal articles should also enrich one's understanding of the data being presented! -
    1 # Get species downstream from Ursus
    -2 downstream(180541, "Species")
    -
         tsn parentName parentTsn        taxonName rankId rankName
    -1 180542      Ursus    180541  Ursus maritimus    220  Species
    -2 180543      Ursus    180541     Ursus arctos    220  Species
    -3 180544      Ursus    180541 Ursus americanus    220  Species
    -4 621850      Ursus    180541 Ursus thibetanus    220  Species
    +Note: If you make something cool using R2G2, please post a link to your KML file in the comments; we would love to see! -
    -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +Citation information for [R2G2][r2g2]: +*Arrigo, N., Albert, L. P., Mickelson, P. G. and Barker, M. S. (2012), Quantitative visualization of biological data in Google Earth using R2G2, an R CRAN package. Molecular Ecology Resources. doi: 10.1111/1755-0998.12012* -

    Written in Markdown, with help from knitr.

    +[googleearth]: http://earth.google.com +[rcran]: http://cran.r-project.org/ +[MERarticle]: http://onlinelibrary.wiley.com/doi/10.1111/1755-0998.12012/abstract +[nils]: http://barkerlab.net/nils.html +[loren]: http://portal.environment.arizona.edu/students/profiles/loren-albert +[mike]: http://barkerlab.net/mike.html +[recology]: http://sckott.github.io/about.html +[gbif]: http://data.gbif.org/ +[r2g2]: http://cran.r-project.org/web/packages/R2G2/index.html +[vultureKML]: /public/img/R2G2tutorial/Vulture_Path.kml +[vultureR]: /public/img/R2G2tutorial/Vulture_Path.R +[movebank]: http://movebank.org/ +[turkeyvulturestudy]: http://movebank.org/movebank/#page%3Dstudies%2Cpath%3Dstudy481458 +[mimulusKML]: /public/img/R2G2tutorial/Mimulus_Distribution.kml +[mimulusR]: /public/img/R2G2tutorial/Mimulus_Distribution.R +[tutorialfile]: /public/img/R2G2tutorial/R2G2tutorial.zip
    diff --git a/_site/page27/index.html b/_site/page27/index.html index 42910ec870..785ae2d791 100644 --- a/_site/page27/index.html +++ b/_site/page27/index.html @@ -59,6 +59,142 @@

    Recology

      +
    +

    + + Getting taxonomic names downstream + +

    + + + + It can be a pain in the ass to get taxonomic names. For example, I sometimes need to get all the Class names for a set of species. This is a relatively easy problem using the [ITIS API](http://www.itis.gov/ws_description.html) (example below). + +The much harder problem is getting all the taxonomic names downstream. ITIS doesn't provide an API method for this - well, they do ([`getHirerachyDownFromTSN`](http://www.itis.gov/ws_hierApiDescription.html#getHierarchyDn)), but it only provides direct children (e.g., the genera within a tribe - but it won't give all the species within each genus). + +So in the `taxize` package, we wrote a function called `downstream`, which allows you to get taxonomic names to any downstream point, e.g.: + ++ get all Classes within Animalia, ++ get all Species within a Family ++ etc. + +### Install packages. You can get other packages from CRAN, but taxize is only on GitHub for now. + +{% highlight r linenos %} +# install_github('ritis', 'ropensci') # uncomment if not already installed +# install_github('taxize_', 'ropensci') # uncomment if not already +# installed +library(ritis) +library(taxize) +{% endhighlight %} + + +### Get upstream taxonomic names. + +{% highlight r linenos %} +# Search for a TSN by scientific name +df <- searchbyscientificname("Tardigrada") +tsn <- df[df$combinedname %in% "Tardigrada", "tsn"] + +# Get just one immediate higher taxonomic name +gethierarchyupfromtsn(tsn = tsn) +{% endhighlight %} + + + +{% highlight text %} + parentName parentTsn rankName taxonName tsn +1 Animalia 202423 Phylum Tardigrada 155166 +{% endhighlight %} + + + +{% highlight r linenos %} + +# Get full hierarchy upstream from TSN +getfullhierarchyfromtsn(tsn = tsn) +{% endhighlight %} + + + +{% highlight text %} + parentName parentTsn rankName taxonName tsn +1 Kingdom Animalia 202423 +2 Animalia 202423 Phylum Tardigrada 155166 +3 Tardigrada 155166 Class Eutardigrada 155362 +4 Tardigrada 155166 Class Heterotardigrada 155167 +5 Tardigrada 155166 Class Mesotardigrada 155358 +{% endhighlight %} + + +### Get taxonomc names downstream. + +{% highlight r linenos %} +# Get genera downstream fromthe Class Bangiophyceae +downstream(846509, "Genus") +{% endhighlight %} + + + +{% highlight text %} + tsn parentName parentTsn taxonName rankId rankName +1 11531 Bangiaceae 11530 Bangia 180 Genus +2 11540 Bangiaceae 11530 Porphyra 180 Genus +3 11577 Bangiaceae 11530 Porphyrella 180 Genus +4 11580 Bangiaceae 11530 Conchocelis 180 Genus +{% endhighlight %} + + + +{% highlight r linenos %} + +# Get families downstream from Acridoidea +downstream(650497, "Family") +{% endhighlight %} + + + +{% highlight text %} + tsn parentName parentTsn taxonName rankId rankName +1 102195 Acridoidea 650497 Acrididae 140 Family +2 650502 Acridoidea 650497 Romaleidae 140 Family +3 657472 Acridoidea 650497 Charilaidae 140 Family +4 657473 Acridoidea 650497 Lathiceridae 140 Family +5 657474 Acridoidea 650497 Lentulidae 140 Family +6 657475 Acridoidea 650497 Lithidiidae 140 Family +7 657476 Acridoidea 650497 Ommexechidae 140 Family +8 657477 Acridoidea 650497 Pamphagidae 140 Family +9 657478 Acridoidea 650497 Pyrgacrididae 140 Family +10 657479 Acridoidea 650497 Tristiridae 140 Family +11 657492 Acridoidea 650497 Dericorythidae 140 Family +{% endhighlight %} + + + +{% highlight r linenos %} + +# Get species downstream from Ursus +downstream(180541, "Species") +{% endhighlight %} + + + +{% highlight text %} + tsn parentName parentTsn taxonName rankId rankName +1 180542 Ursus 180541 Ursus maritimus 220 Species +2 180543 Ursus 180541 Ursus arctos 220 Species +3 180544 Ursus 180541 Ursus americanus 220 Species +4 621850 Ursus 180541 Ursus thibetanus 220 Species +{% endhighlight %} + + +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-10-16-get-taxa-downstream.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-10-16-get-taxa-downstream.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/). + +
    +

    @@ -68,134 +204,163 @@

    -

    I need to simulate balanced and unbalanced phylogenetic trees for some research I am doing. In order to do this, I do rejection sampling: simulate a tree -> measure tree shape -> reject if not balanced or unbalanced enough. But what is enough? We need to define some cutoff value to determine what will be our set of balanced and unbalanced trees.

    - -

    A function to calculate shape metrics, and a custom theme for plottingn phylogenies.

    - -
     1 foo <- function(x, metric = "colless") {
    - 2     if (metric == "colless") {
    - 3         xx <- as.treeshape(x)  # convert to apTreeshape format
    - 4         colless(xx, "yule")  # calculate colless' metric
    - 5     } else if (metric == "gamma") {
    - 6         gammaStat(x)
    - 7     } else stop("metric should be one of colless or gamma")
    - 8 }
    - 9 
    -10 theme_myblank <- function() {
    -11     stopifnot(require(ggplot2))
    -12     theme_blank <- ggplot2::theme_blank
    -13     ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    -14         panel.background = element_blank(), plot.background = element_blank(), 
    -15         axis.title.x = element_text(colour = NA), axis.title.y = element_blank(), 
    -16         axis.text.x = element_blank(), axis.text.y = element_blank(), axis.line = element_blank(), 
    -17         axis.ticks = element_blank())
    -18 }
    - -

    Simulate some trees

    - -
    1 library(ape)
    -2 library(phytools)
    -3 
    -4 numtrees <- 1000  # lets simulate 1000 trees
    -5 trees <- pbtree(n = 50, nsim = numtrees, ape = F)  # simulate 500 pure-birth trees with 100 spp each, ape = F makes it run faster
    - -

    Calculate Colless' shape metric on each tree

    - -
    1 library(plyr)
    -2 library(apTreeshape)
    -3 
    -4 colless_df <- ldply(trees, foo, metric = "colless")  # calculate metric for each tree
    -5 head(colless_df)
    - -
           V1
    +    I need to simulate balanced and unbalanced phylogenetic trees for some research I am doing.  In order to do this, I do rejection sampling: simulate a tree -> measure tree shape -> reject if not balanced or unbalanced __enough__.  But what is enough?  We need to define some cutoff value to determine what will be our set of balanced and unbalanced trees. 
    +
    +### A function to calculate shape metrics, and a custom theme for plottingn phylogenies.
    +
    +{% highlight r linenos %}
    +foo <- function(x, metric = "colless") {
    +    if (metric == "colless") {
    +        xx <- as.treeshape(x)  # convert to apTreeshape format
    +        colless(xx, "yule")  # calculate colless' metric
    +    } else if (metric == "gamma") {
    +        gammaStat(x)
    +    } else stop("metric should be one of colless or gamma")
    +}
    +
    +theme_myblank <- function() {
    +    stopifnot(require(ggplot2))
    +    theme_blank <- ggplot2::theme_blank
    +    ggplot2::theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
    +        panel.background = element_blank(), plot.background = element_blank(), 
    +        axis.title.x = element_text(colour = NA), axis.title.y = element_blank(), 
    +        axis.text.x = element_blank(), axis.text.y = element_blank(), axis.line = element_blank(), 
    +        axis.ticks = element_blank())
    +}
    +{% endhighlight %}
    +
    +
    +### Simulate some trees
    +
    +{% highlight r linenos %}
    +library(ape)
    +library(phytools)
    +
    +numtrees <- 1000  # lets simulate 1000 trees
    +trees <- pbtree(n = 50, nsim = numtrees, ape = F)  # simulate 500 pure-birth trees with 100 spp each, ape = F makes it run faster
    +{% endhighlight %}
    +
    +
    +### Calculate Colless' shape metric on each tree
    +
    +{% highlight r linenos %}
    +library(plyr)
    +library(apTreeshape)
    +
    +colless_df <- ldply(trees, foo, metric = "colless")  # calculate metric for each tree
    +head(colless_df)
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +       V1
     1 -0.1761
     2  0.2839
     3  0.4639
     4  0.9439
     5 -0.6961
    -6 -0.1161
    - -
    1 # Calculate the percent of trees that will fall into the cutoff for balanced and unbalanced trees
    -2 col_percent_low <- round(length(colless_df[colless_df$V1 < -0.7, "V1"])/numtrees, 2) * 100
    -3 col_percent_high <- round(length(colless_df[colless_df$V1 > 0.7, "V1"])/numtrees, 2) * 100
    - -

    Create a distribution of the metric values

    - -
     1 library(ggplot2)
    - 2 
    - 3 a <- ggplot(colless_df, aes(V1)) +  # plot histogram of distribution of values
    - 4  geom_histogram() + 
    - 5  theme_bw(base_size=18) + 
    - 6  scale_x_continuous(limits=c(-3,3), breaks=c(-3,-2,-1,0,1,2,3)) + 
    - 7  geom_vline(xintercept = -0.7, colour="red", linetype = "longdash") +
    - 8  geom_vline(xintercept = 0.7, colour="red", linetype = "longdash") +
    - 9  ggtitle(paste0("Distribution of Colless' metric for 1000 trees, cutoffs at -0.7 and 0.7 results in\n ", col_percent_low, "% (", numtrees*(col_percent_low/100), ") 'balanced' trees (left) and ", col_percent_low, "% (", numtrees*(col_percent_low/100), ") 'unbalanced' trees (right)")) +  
    -10  labs(x = "Colless' Metric Value", y = "Number of phylogenetic trees") +
    -11  theme(plot.title  = element_text(size = 16))
    -12 
    -13 a
    - -

    center

    - -

    Create phylogenies representing balanced and unbalanced trees (using the custom theme)

    - -
    1 library(ggphylo)
    -2 
    -3 b <- ggphylo(trees[which.min(colless_df$V1)], do.plot = F) + theme_myblank()
    -4 c <- ggphylo(trees[which.max(colless_df$V1)], do.plot = F) + theme_myblank()
    -5 
    -6 b
    - -

    center

    - -

    Now, put it all together in one plot using some gridExtra magic.

    - -
     1 library(gridExtra)
    - 2 
    - 3 grid.newpage()
    - 4 pushViewport(viewport(layout = grid.layout(1, 1)))
    - 5 vpa_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.49)
    - 6 vpb_ <- viewport(width = 0.35, height = 0.35, x = 0.23, y = 0.7)
    - 7 vpc_ <- viewport(width = 0.35, height = 0.35, x = 0.82, y = 0.7)
    - 8 print(a, vp = vpa_)
    - 9 print(b, vp = vpb_)
    -10 print(c, vp = vpc_)
    - -

    center

    - -

    And the same for Gamma stat, which measures the distribution of nodes in time.

    - -
     1 gamma_df <- ldply(trees, foo, metric="gamma") # calculate metric for each tree
    - 2 gam_percent_low <- round(length(gamma_df[gamma_df$V1 < -1, "V1"])/numtrees, 2)*100
    - 3 gam_percent_high <- round(length(gamma_df[gamma_df$V1 > 1, "V1"])/numtrees, 2)*100
    - 4 a <- ggplot(gamma_df, aes(V1)) +  # plot histogram of distribution of values
    - 5  geom_histogram() + 
    - 6  theme_bw(base_size=18) + 
    - 7  scale_x_continuous(breaks=c(-3,-2,-1,0,1,2,3)) + 
    - 8  geom_vline(xintercept = -1, colour="red", linetype = "longdash") +
    - 9  geom_vline(xintercept = 1, colour="red", linetype = "longdash") +
    -10  ggtitle(paste0("Distribution of Gamma metric for 1000 trees, cutoffs at -1 and 1 results in\n ", gam_percent_low, "% (", numtrees*(gam_percent_low/100), ") trees with deeper nodes (left) and ", gam_percent_high, "% (", numtrees*(gam_percent_high/100), ") trees with shallower nodes (right)")) +  
    -11  labs(x = "Gamma Metric Value", y = "Number of phylogenetic trees") +
    -12  theme(plot.title  = element_text(size = 16))
    -13 b <- ggphylo(trees[which.min(gamma_df$V1)], do.plot=F) + theme_myblank()
    -14 c <- ggphylo(trees[which.max(gamma_df$V1)], do.plot=F) + theme_myblank()
    -15 
    -16 grid.newpage()
    -17 pushViewport(viewport(layout = grid.layout(1,1)))
    -18 vpa_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.49)
    -19 vpb_ <- viewport(width = 0.35, height = 0.35, x = 0.23, y = 0.7)
    -20 vpc_ <- viewport(width = 0.35, height = 0.35, x = 0.82, y = 0.7)
    -21 print(a, vp = vpa_)
    -22 print(b, vp = vpb_)
    -23 print(c, vp = vpc_)
    - -

    center

    - -
    - -

    Get the .Rmd file used to create this post at my github account - or .md file.

    - -

    Written in Markdown, with help from knitr.

    +6 -0.1161 +{% endhighlight %} + + + +{% highlight r linenos %} + +# Calculate the percent of trees that will fall into the cutoff for balanced and unbalanced trees +col_percent_low <- round(length(colless_df[colless_df$V1 < -0.7, "V1"])/numtrees, 2) * 100 +col_percent_high <- round(length(colless_df[colless_df$V1 > 0.7, "V1"])/numtrees, 2) * 100 +{% endhighlight %} + + +### Create a distribution of the metric values + +{% highlight r linenos %} +library(ggplot2) + +a <- ggplot(colless_df, aes(V1)) + # plot histogram of distribution of values + geom_histogram() + + theme_bw(base_size=18) + + scale_x_continuous(limits=c(-3,3), breaks=c(-3,-2,-1,0,1,2,3)) + + geom_vline(xintercept = -0.7, colour="red", linetype = "longdash") + + geom_vline(xintercept = 0.7, colour="red", linetype = "longdash") + + ggtitle(paste0("Distribution of Colless' metric for 1000 trees, cutoffs at -0.7 and 0.7 results in\n ", col_percent_low, "% (", numtrees*(col_percent_low/100), ") 'balanced' trees (left) and ", col_percent_low, "% (", numtrees*(col_percent_low/100), ") 'unbalanced' trees (right)")) + + labs(x = "Colless' Metric Value", y = "Number of phylogenetic trees") + + theme(plot.title = element_text(size = 16)) + +a +{% endhighlight %} + +![center](/public/img/collesshist.png) + + +### Create phylogenies representing balanced and unbalanced trees (using the custom theme) + +{% highlight r linenos %} +library(ggphylo) + +b <- ggphylo(trees[which.min(colless_df$V1)], do.plot = F) + theme_myblank() +c <- ggphylo(trees[which.max(colless_df$V1)], do.plot = F) + theme_myblank() + +b +{% endhighlight %} + +![center](/public/img/collessphylog.png) + + +### Now, put it all together in one plot using some gridExtra magic. + +{% highlight r linenos %} +library(gridExtra) + +grid.newpage() +pushViewport(viewport(layout = grid.layout(1, 1))) +vpa_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.49) +vpb_ <- viewport(width = 0.35, height = 0.35, x = 0.23, y = 0.7) +vpc_ <- viewport(width = 0.35, height = 0.35, x = 0.82, y = 0.7) +print(a, vp = vpa_) +print(b, vp = vpb_) +print(c, vp = vpc_) +{% endhighlight %} + +![center](/public/img/collessall.png) + + +### And the same for Gamma stat, which measures the distribution of nodes in time. + +{% highlight r linenos %} +gamma_df <- ldply(trees, foo, metric="gamma") # calculate metric for each tree +gam_percent_low <- round(length(gamma_df[gamma_df$V1 < -1, "V1"])/numtrees, 2)*100 +gam_percent_high <- round(length(gamma_df[gamma_df$V1 > 1, "V1"])/numtrees, 2)*100 +a <- ggplot(gamma_df, aes(V1)) + # plot histogram of distribution of values + geom_histogram() + + theme_bw(base_size=18) + + scale_x_continuous(breaks=c(-3,-2,-1,0,1,2,3)) + + geom_vline(xintercept = -1, colour="red", linetype = "longdash") + + geom_vline(xintercept = 1, colour="red", linetype = "longdash") + + ggtitle(paste0("Distribution of Gamma metric for 1000 trees, cutoffs at -1 and 1 results in\n ", gam_percent_low, "% (", numtrees*(gam_percent_low/100), ") trees with deeper nodes (left) and ", gam_percent_high, "% (", numtrees*(gam_percent_high/100), ") trees with shallower nodes (right)")) + + labs(x = "Gamma Metric Value", y = "Number of phylogenetic trees") + + theme(plot.title = element_text(size = 16)) +b <- ggphylo(trees[which.min(gamma_df$V1)], do.plot=F) + theme_myblank() +c <- ggphylo(trees[which.max(gamma_df$V1)], do.plot=F) + theme_myblank() + +grid.newpage() +pushViewport(viewport(layout = grid.layout(1,1))) +vpa_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.49) +vpb_ <- viewport(width = 0.35, height = 0.35, x = 0.23, y = 0.7) +vpc_ <- viewport(width = 0.35, height = 0.35, x = 0.82, y = 0.7) +print(a, vp = vpa_) +print(b, vp = vpb_) +print(c, vp = vpc_) +{% endhighlight %} + +![center](/public/img/gammaall.png) + + +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-10-10-phylogenetic-tree-balance.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-10-10-phylogenetic-tree-balance.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    @@ -208,34 +373,50 @@

    -

    UPDATE: In response to Jarrett's query I laid out a separate use case in which you may want to query by higher taxonomic rankings than species. See below. In addition, added examples of querying by location in reply to comments by seminym.

    + #### UPDATE: In response to Jarrett's query I laid out a separate use case in which you may want to query by higher taxonomic rankings than species. See below. In addition, added examples of querying by location in reply to comments by seminym. + +***** + +We have been working on an R package to get GBIF data from R, with the stable version available through CRAN [here](URL), and the development version available on GitHub [here](http://github.com/rgbif). + +We had a Google Summer of code stuent work on the package this summer - you can see his work on the package over at his GitHub page [here](). We have added some new functionality since his work, and would like to show it off. + +### Lets install rgbif first. + +{% highlight r linenos %} +# install_github('rgbif', 'ropensci') # uncomment if not already installed +library(rgbif) +library(plyr) +library(XML) +library(httr) +library(maps) +library(ggplot2) +{% endhighlight %} + + +### Get taxonomic information on a specific taxon or taxa in GBIF by their taxon concept keys. -
    +{% highlight r linenos %} +(keys <- taxonsearch(scientificname = "Puma concolor")) # many matches to this search +{% endhighlight %} -

    We have been working on an R package to get GBIF data from R, with the stable version available through CRAN here, and the development version available on GitHub here.

    -

    We had a Google Summer of code stuent work on the package this summer - you can see his work on the package over at his GitHub page here. We have added some new functionality since his work, and would like to show it off.

    -

    Lets install rgbif first.

    +{% highlight text %} + [1] "51780668" "51758018" "50010499" "51773067" "51078815" "51798065" + [7] "51088007" "50410780" "50305290" "51791438" +{% endhighlight %} -
    1 # install_github('rgbif', 'ropensci') # uncomment if not already installed
    -2 library(rgbif)
    -3 library(plyr)
    -4 library(XML)
    -5 library(httr)
    -6 library(maps)
    -7 library(ggplot2)
    -

    Get taxonomic information on a specific taxon or taxa in GBIF by their taxon concept keys.

    -
    1 (keys <- taxonsearch(scientificname = "Puma concolor"))  # many matches to this search
    +{% highlight r linenos %} +taxonget(keys[[1]]) # let's get the first one - the first row in the data.frame is the one we searched for (51780668) +{% endhighlight %} -
     [1] "51780668" "51758018" "50010499" "51773067" "51078815" "51798065"
    - [7] "51088007" "50410780" "50305290" "51791438"
    -
    1 taxonget(keys[[1]])  # let's get the first one - the first row in the data.frame is the one we searched for (51780668)
    -
    [[1]]
    +{% highlight text %}
    +[[1]]
                         sciname taxonconceptkeys       rank
     1             Puma concolor         51780668    species
     2                      Puma         51780667      genus
    @@ -245,59 +426,91 @@ 

    Get taxonomic information on a specific taxon or taxa in GBIF by their taxon 6 Chordata 51775774 phylum 7 Animalia 51775773 kingdom 8 Puma concolor californica 51780669 subspecies -9 Puma concolor improcera 51780670 subspecies

    +9 Puma concolor improcera 51780670 subspecies -

    The occurrencedensity function was renamed to densitylist because it is in the density API service, not the occurrence API service. You can use densitylist to get a data.frame of total occurrence counts by one-degree cell for a single taxon, country, dataset, data publisher or data network. Just a quick reminder of what the function can do:

    +{% endhighlight %} -
    1 head(densitylist(originisocountrycode = "CA"))
    -
      cellid minLatitude maxLatitude minLongitude maxLongitude count
    +### The `occurrencedensity` function was renamed to `densitylist` because it is in the `density` API service, not the `occurrence` API service.  You can use `densitylist` to get a data.frame of total occurrence counts by one-degree cell for a single taxon, country, dataset, data publisher or data network.  Just a quick reminder of what the function can do:
    +
    +{% highlight r linenos %}
    +head(densitylist(originisocountrycode = "CA"))
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +  cellid minLatitude maxLatitude minLongitude maxLongitude count
     1  46913          40          41          -67          -66    44
     2  46914          40          41          -66          -65   907
     3  46915          40          41          -65          -64   510
     4  46916          40          41          -64          -63   645
     5  46917          40          41          -63          -62    56
    -6  46918          40          41          -62          -61   143
    - -

    Using a related function, density_spplist, you can get a species list by one-degree cell as well.

    - -
    1 # Get a species list by cell, choosing one at random
    -2 density_spplist(originisocountrycode = "CO", spplist = "random")[1:10]
    - -
     [1] "Abarema laeta (Benth.) Barneby & J.W.Grimes"
    - [2] "Abuta grandifolia (Mart.) Sandwith"         
    - [3] "Acalypha cuneata Poepp."                    
    - [4] "Acalypha diversifolia Jacq."                
    - [5] "Acalypha macrostachya Jacq."                
    - [6] "Acalypha stachyura Pax"                     
    - [7] "Acanthoscelio acutus"                       
    - [8] "Accipiter collaris"                         
    - [9] "Actitis macularia"                          
    -[10] "Adelobotrys klugii Wurdack"                 
    - -
    1 # density_spplist(originisocountrycode = 'CO', spplist = 'r') # can
    -2 # abbreviate the `spplist` argument
    -3 
    -4 # Get a species list by cell, choosing the one with the greatest no. of
    -5 # records
    -6 density_spplist(originisocountrycode = "CO", spplist = "great")[1:10]  # great is abbreviated from `greatest`
    - -
     [1] "Acanthaceae Juss."                
    - [2] "Accipitridae sp."                 
    - [3] "Accipitriformes/Falconiformes sp."
    - [4] "Apodidae sp."                     
    - [5] "Apodidae sp. (large swift sp.)"   
    - [6] "Apodidae sp. (small swift sp.)"   
    - [7] "Arctiinae"                        
    - [8] "Asteraceae Bercht. & J. Presl"    
    - [9] "Asteraceae sp. 1"                 
    -[10] "Asteraceae sp. 6"                 
    - -
    1 # Can also get a data.frame with counts instead of the species list
    -2 density_spplist(originisocountrycode = "CO", spplist = "great", listcount = "counts")[1:10, 
    -3     ]
    - -
                                  names_ count
    +6  46918          40          41          -62          -61   143
    +{% endhighlight %}
    +
    +
    +### Using a related function, `density_spplist`, you can get a species list by one-degree cell as well.
    +
    +{% highlight r linenos %}
    +# Get a species list by cell, choosing one at random
    +density_spplist(originisocountrycode = "CO", spplist = "random")[1:10]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    + [1] "Abarema laeta (Benth.) Barneby & J.W.Grimes"
    + [2] "Abuta grandifolia (Mart.) Sandwith"         
    + [3] "Acalypha cuneata Poepp."                    
    + [4] "Acalypha diversifolia Jacq."                
    + [5] "Acalypha macrostachya Jacq."                
    + [6] "Acalypha stachyura Pax"                     
    + [7] "Acanthoscelio acutus"                       
    + [8] "Accipiter collaris"                         
    + [9] "Actitis macularia"                          
    +[10] "Adelobotrys klugii Wurdack"                 
    +{% endhighlight %}
    +
    +
    +
    +{% highlight r linenos %}
    +# density_spplist(originisocountrycode = 'CO', spplist = 'r') # can
    +# abbreviate the `spplist` argument
    +
    +# Get a species list by cell, choosing the one with the greatest no. of
    +# records
    +density_spplist(originisocountrycode = "CO", spplist = "great")[1:10]  # great is abbreviated from `greatest`
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    + [1] "Acanthaceae Juss."                
    + [2] "Accipitridae sp."                 
    + [3] "Accipitriformes/Falconiformes sp."
    + [4] "Apodidae sp."                     
    + [5] "Apodidae sp. (large swift sp.)"   
    + [6] "Apodidae sp. (small swift sp.)"   
    + [7] "Arctiinae"                        
    + [8] "Asteraceae Bercht. & J. Presl"    
    + [9] "Asteraceae sp. 1"                 
    +[10] "Asteraceae sp. 6"                 
    +{% endhighlight %}
    +
    +
    +
    +{% highlight r linenos %}
    +
    +# Can also get a data.frame with counts instead of the species list
    +density_spplist(originisocountrycode = "CO", spplist = "great", listcount = "counts")[1:10, 
    +    ]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +                              names_ count
     1                  Acanthaceae Juss.     2
     2                   Accipitridae sp.     6
     3  Accipitriformes/Falconiformes sp.     2
    @@ -305,129 +518,118 @@ 

    Using a related function, density_spplist, you can get a specie 5 Apodidae sp. (large swift sp.) 8 6 Apodidae sp. (small swift sp.) 5 7 Arctiinae 7 -8 Asteraceae Bercht. & J. Presl 2 +8 Asteraceae Bercht. & J. Presl 2 9 Asteraceae sp. 1 6 -10 Asteraceae sp. 6 10

    - -

    You can now map point results, from fxns occurrencelist and those from densitylist, which plots them as points or as tiles, respectively. Point map, using output from occurrencelist.

    - -
    1 out <- occurrencelist(scientificname = "Puma concolor", coordinatestatus = TRUE, 
    -2     maxresults = 100, latlongdf = T)
    -3 gbifmap(input = out)  # make a map, plotting on world map
    - -

    center

    +10 Asteraceae sp. 6 10 +{% endhighlight %} -

    Point map, using output from occurrencelist, with many species plotted as different colors

    -
    1 splist <- c("Accipiter erythronemius", "Junco hyemalis", "Aix sponsa", "Buteo regalis")
    -2 out <- lapply(splist, function(x) occurrencelist(x, coordinatestatus = T, maxresults = 100, 
    -3     latlongdf = T))
    -4 gbifmap(out)
    +### You can now map point results, from fxns `occurrencelist` and those from `densitylist`, which plots them as points or as tiles, respectively. Point map, using output from occurrencelist. -

    center

    +{% highlight r linenos %} +out <- occurrencelist(scientificname = "Puma concolor", coordinatestatus = TRUE, + maxresults = 100, latlongdf = T) +gbifmap(input = out) # make a map, plotting on world map +{% endhighlight %} -

    Tile map, using output from densitylist, using results in Canada only.

    +![center](/public/img/gbifmap1.png) -
    1 out2 <- densitylist(originisocountrycode = "CA")  # data for Canada
    -2 gbifmap(out2)  # on world map
    -

    center

    +### Point map, using output from occurrencelist, with many species plotted as different colors -
    1 gbifmap(out2, region = "Canada")  # on Canada map
    +{% highlight r linenos %} +splist <- c("Accipiter erythronemius", "Junco hyemalis", "Aix sponsa", "Buteo regalis") +out <- lapply(splist, function(x) occurrencelist(x, coordinatestatus = T, maxresults = 100, + latlongdf = T)) +gbifmap(out) +{% endhighlight %} -

    NA

    +![center](/public/img/gbifmap2.png) -
    -

    We can also query by higher taxonomic rankings, and map all lower species within that ranking. Above we queried by scientificname, but we can also query by higher taxonomy. 7071443 is the taxonconceptkey for 'Bacillariophyceae', a Class which includes many lower species.

    +### Tile map, using output from densitylist, using results in Canada only. -
    1 out <- densitylist(taxonconceptKey = 7071443)
    -2 gbifmap(out)
    +{% highlight r linenos %} +out2 <- densitylist(originisocountrycode = "CA") # data for Canada +gbifmap(out2) # on world map +{% endhighlight %} -

    center

    +![center](/public/img/gbifmap31.png) -

    seminym asked about querying by area. You can query by area, though slightly differently for occurrencelist and densitylist functions. For occurrencelist you can search using min and max lat and long values (and min an max altitude, pretty cool, eh).

    +{% highlight r linenos %} +gbifmap(out2, region = "Canada") # on Canada map +{% endhighlight %} -
    1 # Get occurrences or density by area, using min/max lat/long coordinates
    -2 out <- occurrencelist(minlatitude = 30, maxlatitude = 35, minlongitude = -100, 
    -3     maxlongitude = -95, coordinatestatus = T, maxresults = 5000, latlongdf = T)
    -4 
    -5 # Using `geom_point`
    -6 gbifmap(out, "state", "texas", geom_point)
    +![NA](/public/img/gbifmap32.png) -

    center

    -
    1 # Using geom_jitter to move the points apart from one another
    -2 gbifmap(out, "state", "texas", geom_jitter, position_jitter(width = 0.3, height = 0.3))
    +***** -

    NA

    +### We can also query by higher taxonomic rankings, and map all lower species within that ranking. Above we queried by scientificname, but we can also query by higher taxonomy. 7071443 is the taxonconceptkey for 'Bacillariophyceae', a Class which includes many lower species. -
    1 # And move points a lot
    -2 gbifmap(out, "state", "texas", geom_jitter, position_jitter(width = 1, height = 1))
    +{% highlight r linenos %} +out <- densitylist(taxonconceptKey = 7071443) +gbifmap(out) +{% endhighlight %} -

    NA

    +![center](/public/img/algae.png) -

    And you can query by area in densitylist by specifying a place using the originisocountrycode argument (as done in an above example). Just showing the head of the data.frame here.

    -
    1 # Get density by place, note that you can't use the lat and long arguments
    -2 # in densitylist
    -3 head(densitylist(originisocountrycode = "CA"))
    +### seminym asked about querying by area. You can query by area, though slightly differently for occurrencelist and densitylist functions. For occurrencelist you can search using min and max lat and long values (and min an max altitude, pretty cool, eh). -
      cellid minLatitude maxLatitude minLongitude maxLongitude count
    -1  46913          40          41          -67          -66    44
    -2  46914          40          41          -66          -65   907
    -3  46915          40          41          -65          -64   510
    -4  46916          40          41          -64          -63   645
    -5  46917          40          41          -63          -62    56
    -6  46918          40          41          -62          -61   143
    +{% highlight r linenos %} +# Get occurrences or density by area, using min/max lat/long coordinates +out <- occurrencelist(minlatitude = 30, maxlatitude = 35, minlongitude = -100, + maxlongitude = -95, coordinatestatus = T, maxresults = 5000, latlongdf = T) -
    +# Using `geom_point` +gbifmap(out, "state", "texas", geom_point) +{% endhighlight %} -

    Get the .Rmd file used to create this post at my github account - or .md file.

    - -

    Written in Markdown, with help from knitr.

    - -
    - -
    -

    - - Vertnet - getting vertebrate museum record data and a quick map - -

    +![center](/public/img/byarea_occurr1.png) - +{% highlight r linenos %} -

    We (rOpenSci) started a repo to wrap the API for VertNet, an open access online database of vertebrate specimen records across many collection holders. Find the open source code here - please contribute if you are so inclined. We had a great Google Summer of Code student, Vijay Barve contributing to the repo this summer, so it is getting close to being CRAN-able.

    +# Using geom_jitter to move the points apart from one another +gbifmap(out, "state", "texas", geom_jitter, position_jitter(width = 0.3, height = 0.3)) +{% endhighlight %} -

    Most of the functions in the repo get you the raw data, but there were no functions to visualize the data. Since much of the data records of latitude and longitude data, maps are a natural visualization to use.

    +![NA](/public/img/byarea_occurr2.png) -

    What follows is a quick example of using the basic vertmap function.

    +{% highlight r linenos %} -

    First, let's install rvertnet

    +# And move points a lot +gbifmap(out, "state", "texas", geom_jitter, position_jitter(width = 1, height = 1)) +{% endhighlight %} -
    1 # install_github('rvertnet', 'ropensci') # uncomment if not installed
    -2 # already
    -3 library(rvertnet)
    +![NA](/public/img/byarea_occurr3.png) -

    First, let's get some data using vertoccurrence

    -
    1 out <- vertoccurrence(q = "larva", num = 100)  # get records on keyword 'larva', limit to 100
    -2 nrow(out)  # how many rows?
    +### And you can query by area in `densitylist` by specifying a place using the `originisocountrycode` argument (as done in an above example). Just showing the head of the data.frame here. -
    [1] 100
    +{% highlight r linenos %} +# Get density by place, note that you can't use the lat and long arguments +# in densitylist +head(densitylist(originisocountrycode = "CA")) +{% endhighlight %} -

    Now map it using vertmap. This is a very basic function: it simply cleans up the input data.frame, removing rows without lat/long data, and providing warnings when the input data.frame is not in the correct format. vertmap uses the ggplot2 framework for the map. If you want to make you own map please do so - this is just a simple fxn to get you started if you want to take a quick look at the data.

    -
    1 vertmap(input = out)  # make a map using vertmap
    -

    center

    +{% highlight text %} + cellid minLatitude maxLatitude minLongitude maxLongitude count +1 46913 40 41 -67 -66 44 +2 46914 40 41 -66 -65 907 +3 46915 40 41 -65 -64 510 +4 46916 40 41 -64 -63 645 +5 46917 40 41 -63 -62 56 +6 46918 40 41 -62 -61 143 +{% endhighlight %} -
    -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-10-08-rgbif-newfxns.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-10-08-rgbif-newfxns.md). -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    diff --git a/_site/page28/index.html b/_site/page28/index.html index 5ffc92eae0..43042ef33a 100644 --- a/_site/page28/index.html +++ b/_site/page28/index.html @@ -61,102 +61,140 @@

    Recology

    - - Getting data from figures in published papers + + Vertnet - getting vertebrate museum record data and a quick map

    - + + + We ([rOpenSci](http://ropensci.org/)) started a repo to wrap the API for [VertNet](http://vertnet.org/index.php), an open access online database of vertebrate specimen records across many collection holders. Find the open source code [here](https://github.com/ropensci/rvertnet) - please contribute if you are so inclined. We had a great Google Summer of Code student, [Vijay Barve](http://vijaybarve.wordpress.com/) contributing to the repo this summer, so it is getting close to being CRAN-able. + +Most of the functions in the repo get you the raw data, but there were no functions to visualize the data. Since much of the data records of latitude and longitude data, maps are a natural visualization to use. -

    The problem:

    +What follows is a quick example of using the basic `vertmap` function. -

    There are a lot of figures in published papers in the scholarly literature, like the below, from (Attwood et. al. 2012)):

    +### First, let's install `rvertnet` -

    alt text

    +{% highlight r linenos %} +# install_github('rvertnet', 'ropensci') # uncomment if not installed +# already +library(rvertnet) +{% endhighlight %} -

    At some point, a scientist wants to ask a question for which they can synthesize the knowledge on that question by collecting data from the published literature. This often requires something like the following workflow:

    -
      -
    1. Search for relevant papers (e.g., via Google Scholar).
    2. -
    3. Collect the papers.
    4. -
    5. Decide which are appropriate for inclusion.
    6. -
    7. Collect data from the figures using software on a native application. Examples include GraphClick and ImageJ.
    8. -
    9. Proof data.
    10. -
    11. Analyze data & publish paper.
    12. -
    +### First, let's get some data using `vertoccurrence` -

    This workflow needs revamping, particularly in step number 3 - collecting the data. This data remains private, moving from one closed source (original publication) to another (personal computer). We can surely do better.

    +{% highlight r linenos %} +out <- vertoccurrence(q = "larva", num = 100) # get records on keyword 'larva', limit to 100 +nrow(out) # how many rows? +{% endhighlight %} -

    A solution

    -

    The data collection process (Step 3 above) can make use of modern technology, and be based in the browser. Some benefits of a browser based data collection approach:

    -
      -
    • Cross-platform: a data digitization program that lives in the browser can be more easily cross-platform (Linux/Mac/Windows) than a native app.
    • -
    • Linked data: with the increasing abundance of APIs (application programming interfaces), we can link the data going into this app to anything of interest. This is not so easy in a native app.
    • -
    • Automatic updates: a web based browser can be updated easily without requiring a user to go get updates.
    • -
    • User-based: a web based browser can easily integrate secure user login so that users can be associated with data collected, allowing for quantification of user-based error, and eventually user based scores/badges/etc. if so desired.
    • -
    +{% highlight text %} +[1] 100 +{% endhighlight %} -

    For those concerned about a browser based approach to data collection from figures, it will likely be possible to make it work offline as well, then send data up to servers when connected to the web again.

    -

    What would be great about having data be public by default is that the work would be reproducible easily, at least on the data side of things. Hopefully the researchers would make all their code available publicly to recreate their analyses.

    +### Now map it using `vertmap`. This is a very basic function: it simply cleans up the input data.frame, removing rows without lat/long data, and providing warnings when the input data.frame is not in the correct format. `vertmap` uses the `ggplot2` framework for the map. If you want to make you own map please do so - this is just a simple fxn to get you started if you want to take a quick look at the data. -

    Question: Why would this idea work?

    +{% highlight r linenos %} +vertmap(input = out) # make a map using vertmap +{% endhighlight %} -

    Better question: Why wouldn’t it work!

    +![center](/public/img/vertmap.png) + + +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-09-19-rvertnet.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-09-19-rvertnet.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/). + +
    + +
    +

    + + Getting data from figures in published papers + +

    + + -

    I think this idea could be awesome. The reason I think it could be is based on two observations:

    + ## The problem: +There are a lot of figures in published papers in the scholarly literature, like the below, from (Attwood _et. al._ 2012)): -
      -
    1. There is a seemingly endless supply of academic papers with figures in them from which data can be extracted.**
    2. -
    3. There is increasing use of meta-analysis in science, which is fed by just this kind of data.
    4. -
    +![alt text](/public/img/getfig2.png) -

    ** p.s. in the future, perhaps we will move to all SVG format figures or something even better, in which case data can be extracted from the underlying XML

    +At some point, a scientist wants to ask a question for which they can synthesize the knowledge on that question by collecting data from the published literature. This often requires something like the following workflow: -

    Okay, maybe it's a good idea, but who owns the data in figures in published papers?

    +1. Search for relevant papers (e.g., via Google Scholar). +2. Collect the papers. +3. Decide which are appropriate for inclusion. +4. Collect data from the figures using software on a native application. Examples include [GraphClick](http://www.arizona-software.ch/graphclick/) and [ImageJ](http://rsbweb.nih.gov/ij/). +5. Proof data. +6. Analyze data & publish paper. -

    As far as I know, and I've checked with a few knowledgeable people, no one owns this data. So it's ripe for the digitizing!

    +This workflow needs revamping, particularly in step number 3 - collecting the data. This data remains private, moving from one closed source (original publication) to another (personal computer). We can surely do better. -

    Open access

    +## A solution +The data collection process (Step 3 above) can make use of modern technology, and be based in the browser. Some benefits of a browser based data collection approach: -

    I want this project to be totally open access (and I hope you do too). I love models like GitHub where everything is public by default (unless you are an enterprise user, exceptions, exceptions), and I think that is what this requires. You may be thinking though: "But I am collecting data for my meta-analysis and I don't want to share the data with anyone else". My answer: "I understand where you are coming from, but it doesn't seem very likely that someone will be asking the exact same question as you and be looking for the data from the exact same papers". There will just be a huge database of data from figures, and all the appropriate metadata of course. Anyone should be able to use this.

    ++ Cross-platform: a data digitization program that lives in the browser can be more easily cross-platform (Linux/Mac/Windows) than a native app. ++ Linked data: with the increasing abundance of APIs (application programming interfaces), we can link the data going into this app to anything of interest. This is not so easy in a native app. ++ Automatic updates: a web based browser can be updated easily without requiring a user to go get updates. ++ User-based: a web based browser can easily integrate secure user login so that users can be associated with data collected, allowing for quantification of user-based error, and eventually user based scores/badges/etc. if so desired. -

    APIs

    +For those concerned about a browser based approach to data collection from figures, it will likely be possible to make it work offline as well, then send data up to servers when connected to the web again. -

    It would be great to build this from the start having an API in mind. That is, how do we need to structure the data to be easily served up in an API to other websites, or pulled down to someone's local machine within Python or R to do data manipulation, analysis, and visualization? We are going to need a key-value store database, such as MongoDB/CouchDB because ideally at least we would store the data collected, the figure itself, use information, etc.

    +What would be great about having data be public by default is that the work would be reproducible easily, at least on the data side of things. Hopefully the researchers would make all their code available publicly to recreate their analyses. -

    What is being done about this?

    +## Question: Why would this idea work? +Better question: Why wouldn’t it work! -

    I was fortunate enough to tag along with Ted Hart, a postdoc at UBC, on a recently submitted NCEAS working group proposal. Who knows if we'll get it, but we are already working on a prototype, so we will hit the ground running if we get funded, and just hit the ground, but walk a bit slower if we don't get the funding.

    +I think this idea could be awesome. The reason I think it could be is based on two observations: -

    What could this be in the future?

    +1. There is a seemingly endless supply of academic papers with figures in them from which data can be extracted.** +2. There is increasing use of meta-analysis in science, which is fed by just this kind of data. -

    At least in my mind, I think of this idea going the direction of gamification, including points, badges, etc., sort of like FoldIt or GalaxyZoo. At first we need alpha-, then beta-testers, which I imagine will most likely be academics exracting data for a meta-analysis for example. But in the future, it would be great to make the interface enjoyable enough to attract non-academics, which could greatly increase the speed of data collection.

    +** p.s. in the future, perhaps we will move to all SVG format figures or something even better, in which case data can be extracted from the underlying XML -

    Once there are a lot of people collecting data we can get many data points for every single data point in a graph. Whereas right now, someone clicks on each data point in a graph one, maybe two times if they are lucky. In the future, we could have ten different users clicking on each mean and each error bar in each graph. So exciting! The following figure illustrates this.

    +## Okay, maybe it's a good idea, but who owns the data in figures in published papers? +As far as I know, and I've checked with a few knowledgeable people, no one owns this data. So it's ripe for the digitizing! -

    center

    +## Open access +I want this project to be totally open access (and I hope you do too). I love models like GitHub where everything is public by default (unless you are an enterprise user, exceptions, exceptions), and I think that is what this requires. You may be thinking though: "But I am collecting data for my meta-analysis and I don't want to share the data with anyone else". My answer: "I understand where you are coming from, but it doesn't seem very likely that someone will be asking the exact same question as you and be looking for the data from the exact same papers". There will just be a huge database of data from figures, and all the appropriate metadata of course. Anyone should be able to use this. -

    What do you think?

    +## APIs +It would be great to build this from the start having an API in mind. That is, how do we need to structure the data to be easily served up in an API to other websites, or pulled down to someone's local machine within Python or R to do data manipulation, analysis, and visualization? We are going to need a key-value store database, such as MongoDB/CouchDB because ideally at least we would store the data collected, the figure itself, use information, etc. -

    Is this idea totally insane? Is it do-able? Is it worth doing?

    +## What is being done about this? +I was fortunate enough to tag along with [Ted Hart](http://emhart.github.com/), a postdoc at [UBC](PUTINLINKHERE), on a recently submitted NCEAS working group proposal. Who knows if we'll get it, but we are already working on a prototype, so we will hit the ground running if we get funded, and just hit the ground, but walk a bit slower if we don't get the funding. -
    +## What could this be in the future? +At least in my mind, I think of this idea going the direction of gamification, including points, badges, etc., sort of like [FoldIt](http://fold.it/portal/) or [GalaxyZoo](http://www.galaxyzoo.org/). At first we need alpha-, then beta-testers, which I imagine will most likely be academics exracting data for a meta-analysis for example. But in the future, it would be great to make the interface enjoyable enough to attract non-academics, which could greatly increase the speed of data collection. -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +Once there are a lot of people collecting data we can get many data points for every single data point in a graph. Whereas right now, someone clicks on each data point in a graph one, maybe two times if they are lucky. In the future, we could have ten different users clicking on each mean and each error bar in each graph. So exciting! The following figure illustrates this. -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    +![center](/public/img/clicks.png) -
    -

    References

    +## What do you think? +Is this idea totally insane? Is it do-able? Is it worth doing? -

    Attwood AS, Scott-Samuel NE, Stothart G, Munafò MR and Laks J (2012). +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-09-18-getting-data.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-09-18-getting-data.md). + +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/). + +********* +#### References +

    Attwood AS, Scott-Samuel NE, Stothart G, Munafò MR and Laks J (2012). “Glass Shape Influences Consumption Rate For Alcoholic Beverages.” Plos One, 7. -http://dx.doi.org/10.1371/journal.pone.0043007.

    +http://dx.doi.org/10.1371/journal.pone.0043007. +
    @@ -169,46 +207,61 @@

    -

    Metadata! Metadata is very cool. It's super hot right now - everybody is talking about it. Okay, maybe not everyone, but it's an important part of archiving scholarly work.

    + Metadata! Metadata is very cool. It's super hot right now - everybody is talking about it. Okay, maybe not everyone, but it's an important part of archiving scholarly work. + +We are working on [a repo on GitHub `rmetadata`](https://github.com/ropensci/rmetadata) to be a one stop shop for querying metadata from around the web. Various repos on GitHub we have started - [rpmc](https://github.com/ropensci/rpmc), [rdatacite](https://github.com/ropensci/rpmc), [rdryad](https://github.com/ropensci/rpmc), [rpensoft](https://github.com/ropensci/rpmc), [rhindawi](https://github.com/ropensci/rpmc) - will at least in part be folded into `rmetadata`. + +As a start we are writing functions to hit any metadata services that use the [OAI-PMH: "Open Archives Initiative Protocol for Metadata Harvesting"](http://www.openarchives.org/OAI/openarchivesprotocol.html) framework. `OAI-PMH` has six methods (or verbs as they are called) for data harvesting that are the same across different metadata providers: -

    We are working on a repo on GitHub rmetadata to be a one stop shop for querying metadata from around the web. Various repos on GitHub we have started - rpmc, rdatacite, rdryad, rpensoft, rhindawi - will at least in part be folded into rmetadata.

    ++ `GetRecord` ++ `Identify` ++ `ListIdentifiers` ++ `ListMetadataFormats` ++ `ListRecords` ++ `ListSets` -

    As a start we are writing functions to hit any metadata services that use the OAI-PMH: "Open Archives Initiative Protocol for Metadata Harvesting" framework. OAI-PMH has six methods (or verbs as they are called) for data harvesting that are the same across different metadata providers:

    +`OAI-PMH` provides an updating list of data providers, which we can easily use to get the base URLs for their data. Then we just use one of the six above methods to query their metadata. -
      -
    • GetRecord
    • -
    • Identify
    • -
    • ListIdentifiers
    • -
    • ListMetadataFormats
    • -
    • ListRecords
    • -
    • ListSets
    • -
    +### Let's install rmetadata first. -

    OAI-PMH provides an updating list of data providers, which we can easily use to get the base URLs for their data. Then we just use one of the six above methods to query their metadata.

    +{% highlight r linenos %} +install_github("rmetadata", "ropensci") +library(rmetadata) +{% endhighlight %} -

    Let's install rmetadata first.

    -
    1 install_github("rmetadata", "ropensci")
    -2 library(rmetadata)
    +### The most basic thing you can do with `OAI-PMH` is identify the data provider, getting their basic information. The `Identify` verb. -

    The most basic thing you can do with OAI-PMH is identify the data provider, getting their basic information. The Identify verb.

    +{% highlight r linenos %} +# one provider +md_identify(provider = "datacite") +{% endhighlight %} -
    1 # one provider
    -2 md_identify(provider = "datacite")
    -
    repositoryName                     baseURL protocolVersion
    +
    +{% highlight text %}
    +repositoryName                     baseURL protocolVersion
     1   DataCite MDS http://oai.datacite.org/oai             2.0
             adminEmail    earliestDatestamp deletedRecord
     1 admin@datacite.org 2011-01-01T00:00:00Z            no
              granularity compression compression.1
     1 YYYY-MM-DDThh:mm:ssZ        gzip       deflate
                                                                                                                                                         description
    -1 oai, oai.datacite.org, :, oai:oai.datacite.org:12425, http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd
    +1 oai, oai.datacite.org, :, oai:oai.datacite.org:12425, http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd +{% endhighlight %} + + -
    1 # many providers
    -2 md_identify(provider = c("datacite", "pensoft"))
    +{% highlight r linenos %} -
        repositoryName                     baseURL protocolVersion
    +# many providers
    +md_identify(provider = c("datacite", "pensoft"))
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +    repositoryName                     baseURL protocolVersion
     1       DataCite MDS http://oai.datacite.org/oai             2.0
     2 Pensoft Publishers       http://oai.pensoft.eu             2.0
             adminEmail    earliestDatestamp deletedRecord
    @@ -219,38 +272,64 @@ 

    The most basic thing you can do with OAI-PMH is identify the da 2 YYYY-MM-DD NULL NULL description 1 oai, oai.datacite.org, :, oai:oai.datacite.org:12425, http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd -2 NULL

    +2 NULL +{% endhighlight %} + + -
    1 # no match for one, two matches for other
    -2 md_identify(provider = c("harvard", "journal"))
    +{% highlight r linenos %} -
    $harvard
    +# no match for one, two matches for other
    +md_identify(provider = c("harvard", "journal"))
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +$harvard
                  x
     1 no match found
     
     $journal
                                                  repo_name
     1       Hrcak - Portal of scientific journals of Croatia
    -2 International journal of Power Electronics Engineering
    +2 International journal of Power Electronics Engineering + +{% endhighlight %} -
    1 # let's pick one from the second
    -2 md_identify(provider = "Hrcak")
    -
                                      repositoryName
    +
    +{% highlight r linenos %}
    +
    +# let's pick one from the second
    +md_identify(provider = "Hrcak")
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +                                  repositoryName
     1 Hrcak - Portal of scientific journals of Croatia
                       baseURL protocolVersion    adminEmail
     1 http://hrcak.srce.hr/oai/             2.0 hrcak@srce.hr
     earliestDatestamp deletedRecord granularity
     1        2005-12-01            no  YYYY-MM-DD
                                                                                                                                                                             description
    -1 oai, hrcak.srce.hr, :, oai:hrcak.srce.hr:anIdentifier, http://www.openarchives.org/OAI/2.0/oai-identifier                    http://www.openarchives.org/OAI/2.0/oai-identifier.xsd
    +1 oai, hrcak.srce.hr, :, oai:hrcak.srce.hr:anIdentifier, http://www.openarchives.org/OAI/2.0/oai-identifier http://www.openarchives.org/OAI/2.0/oai-identifier.xsd +{% endhighlight %} -

    There are a variety of metadata formats, depending on the data provider - list them with the ListMetadataFormats verb.

    -
    1 # List metadata formats for a provider
    -2 md_listmetadataformats(provider = "dryad")
    +### There are a variety of metadata formats, depending on the data provider - list them with the `ListMetadataFormats` verb. -
    metadataPrefix
    +{% highlight r linenos %}
    +# List metadata formats for a provider
    +md_listmetadataformats(provider = "dryad")
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +metadataPrefix
     1         oai_dc
     2            rdf
     3            ore
    @@ -264,12 +343,21 @@ 

    There are a variety of metadata formats, depending on the data provider - li 1 http://www.openarchives.org/OAI/2.0/oai_dc/ 2 http://www.openarchives.org/OAI/2.0/rdf/ 3 http://www.w3.org/2005/Atom -4 http://www.loc.gov/METS/

    +4 http://www.loc.gov/METS/ +{% endhighlight %} + + -
    1 # List metadata formats for a specific identifier for a provider
    -2 md_listmetadataformats(provider = "pensoft", identifier = "10.3897/zookeys.1.10")
    +{% highlight r linenos %} -
              identifier metadataPrefix
    +# List metadata formats for a specific identifier for a provider
    +md_listmetadataformats(provider = "pensoft", identifier = "10.3897/zookeys.1.10")
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +          identifier metadataPrefix
     1 10.3897/zookeys.1.10         oai_dc
     2 10.3897/zookeys.1.10           mods
                                                schema
    @@ -277,79 +365,127 @@ 

    There are a variety of metadata formats, depending on the data provider - li 2 http://www.loc.gov/standards/mods/v3/mods-3-1.xsd metadataNamespace 1 http://www.openarchives.org/OAI/2.0/oai_dc/ -2 http://www.loc.gov/mods/v3

    +2 http://www.loc.gov/mods/v3 +{% endhighlight %} + + +### The `ListRecords` verb is used to harvest records from a repository -

    The ListRecords verb is used to harvest records from a repository

    +{% highlight r linenos %} +head(md_listrecords(provider = "datacite")[[1]][, 2:4]) +{% endhighlight %} -
    1 head(md_listrecords(provider = "datacite")[[1]][, 2:4])
    -
                    identifier            datestamp setSpec
    +
    +{% highlight text %}
    +                identifier            datestamp setSpec
     1 oai:oai.datacite.org:32153 2011-06-08T08:57:11Z     TIB
     2 oai:oai.datacite.org:32200 2011-06-20T08:11:08Z     TIB
     3 oai:oai.datacite.org:32220 2011-06-28T14:11:08Z     TIB
     4 oai:oai.datacite.org:32241 2011-06-30T13:24:45Z     TIB
     5 oai:oai.datacite.org:32255 2011-07-01T12:09:24Z     TIB
    -6 oai:oai.datacite.org:32282 2011-07-05T09:08:10Z     TIB
    - -

    ListIdentifiers is an abbreviated form of ListRecords, retrieving only headers rather than records.

    - -
    1 # Single provider
    -2 md_listidentifiers(provider = "datacite", set = "REFQUALITY")[[1]][1:10]
    - -
    [1] "oai:oai.datacite.org:32426" "oai:oai.datacite.org:32152"
    -[3] "oai:oai.datacite.org:25453" "oai:oai.datacite.org:25452"
    -[5] "oai:oai.datacite.org:25451" "oai:oai.datacite.org:25450"
    -[7] "oai:oai.datacite.org:25449" "oai:oai.datacite.org:25407"
    -[9] "oai:oai.datacite.org:48328" "oai:oai.datacite.org:48439"
    - -
    1 md_listidentifiers(provider = "dryad", from = "2012-07-15")[[1]][1:10]
    - -
    [1] "oai:datadryad.org:10255/dryad.9106"
    -[2] "oai:datadryad.org:10255/dryad.33780"
    -[3] "oai:datadryad.org:10255/dryad.33901"
    -[4] "oai:datadryad.org:10255/dryad.33902"
    -[5] "oai:datadryad.org:10255/dryad.34472"
    -[6] "oai:datadryad.org:10255/dryad.34558"
    -[7] "oai:datadryad.org:10255/dryad.39975"
    -[8] "oai:datadryad.org:10255/dryad.35065"
    -[9] "oai:datadryad.org:10255/dryad.35081"
    -[10] "oai:datadryad.org:10255/dryad.35082"
    - -
    1 # Many providers
    -2 out <- md_listidentifiers(provider = c("datacite", "pensoft"), from = "2012-08-21")
    -3 llply(out, function(x) x[1:10])  # display just a few of them
    - -
    [[1]]
    -[1] "oai:oai.datacite.org:1099317" "oai:oai.datacite.org:1099572"
    -[3] "oai:oai.datacite.org:1099824" "oai:oai.datacite.org:1099695"
    -[5] "oai:oai.datacite.org:1088239" "oai:oai.datacite.org:1088122"
    -[7] "oai:oai.datacite.org:1088190" "oai:oai.datacite.org:1175749"
    -[9] "oai:oai.datacite.org:1175288" "oai:oai.datacite.org:1115603"
    +6 oai:oai.datacite.org:32282 2011-07-05T09:08:10Z     TIB
    +{% endhighlight %}
    +
    +
    +### `ListIdentifiers` is an abbreviated form of `ListRecords`, retrieving only headers rather than records.
    +
    +{% highlight r linenos %}
    +# Single provider
    +md_listidentifiers(provider = "datacite", set = "REFQUALITY")[[1]][1:10]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +[1] "oai:oai.datacite.org:32426" "oai:oai.datacite.org:32152"
    +[3] "oai:oai.datacite.org:25453" "oai:oai.datacite.org:25452"
    +[5] "oai:oai.datacite.org:25451" "oai:oai.datacite.org:25450"
    +[7] "oai:oai.datacite.org:25449" "oai:oai.datacite.org:25407"
    +[9] "oai:oai.datacite.org:48328" "oai:oai.datacite.org:48439"
    +{% endhighlight %}
    +
    +
    +
    +{% highlight r linenos %}
    +md_listidentifiers(provider = "dryad", from = "2012-07-15")[[1]][1:10]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +[1] "oai:datadryad.org:10255/dryad.9106"
    +[2] "oai:datadryad.org:10255/dryad.33780"
    +[3] "oai:datadryad.org:10255/dryad.33901"
    +[4] "oai:datadryad.org:10255/dryad.33902"
    +[5] "oai:datadryad.org:10255/dryad.34472"
    +[6] "oai:datadryad.org:10255/dryad.34558"
    +[7] "oai:datadryad.org:10255/dryad.39975"
    +[8] "oai:datadryad.org:10255/dryad.35065"
    +[9] "oai:datadryad.org:10255/dryad.35081"
    +[10] "oai:datadryad.org:10255/dryad.35082"
    +{% endhighlight %}
    +
    +
    +
    +{% highlight r linenos %}
    +
    +# Many providers
    +out <- md_listidentifiers(provider = c("datacite", "pensoft"), from = "2012-08-21")
    +llply(out, function(x) x[1:10])  # display just a few of them
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +[[1]]
    +[1] "oai:oai.datacite.org:1099317" "oai:oai.datacite.org:1099572"
    +[3] "oai:oai.datacite.org:1099824" "oai:oai.datacite.org:1099695"
    +[5] "oai:oai.datacite.org:1088239" "oai:oai.datacite.org:1088122"
    +[7] "oai:oai.datacite.org:1088190" "oai:oai.datacite.org:1175749"
    +[9] "oai:oai.datacite.org:1175288" "oai:oai.datacite.org:1115603"
     
     [[2]]
    -[1] "10.3897/phytokeys.16.2884" "10.3897/phytokeys.16.3602"
    -[3] "10.3897/phytokeys.16.3186" "10.3897/zookeys.216.3407"
    -[5] "10.3897/zookeys.216.3332"  "10.3897/zookeys.216.3224"
    -[7] "10.3897/zookeys.216.3769"  "10.3897/zookeys.216.3360"
    -[9] "10.3897/zookeys.216.3646"  "10.3897/neobiota.14.3140"
    +[1] "10.3897/phytokeys.16.2884" "10.3897/phytokeys.16.3602" +[3] "10.3897/phytokeys.16.3186" "10.3897/zookeys.216.3407" +[5] "10.3897/zookeys.216.3332" "10.3897/zookeys.216.3224" +[7] "10.3897/zookeys.216.3769" "10.3897/zookeys.216.3360" +[9] "10.3897/zookeys.216.3646" "10.3897/neobiota.14.3140" + +{% endhighlight %} + -

    With ListSets you can retrieve the set structure of a repository.

    +### With `ListSets` you can retrieve the set structure of a repository. -
    1 # arXiv, returns a data.frame
    -2 head(md_listsets(provider = "arXiv")[[1]])
    +{% highlight r linenos %} +# arXiv, returns a data.frame +head(md_listsets(provider = "arXiv")[[1]]) +{% endhighlight %} -
               setName          setSpec
    +
    +
    +{% highlight text %}
    +           setName          setSpec
     1   Computer Science               cs
     2        Mathematics             math
     3 Nonlinear Sciences             nlin
     4            Physics          physics
     5       Astrophysics physics:astro-ph
    -6   Condensed Matter physics:cond-mat
    +6 Condensed Matter physics:cond-mat +{% endhighlight %} + + + +{% highlight r linenos %} -
    1 # many providers, returns a list
    -2 md_listsets(provider = c("pensoft", "arXiv"))
    +# many providers, returns a list +md_listsets(provider = c("pensoft", "arXiv")) +{% endhighlight %} -
    [[1]]
    +
    +
    +{% highlight text %}
    +[[1]]
                                     setName            setSpec
     1                                 ZooKeys            zookeys
     2                                 BioRisk            biorisk
    @@ -382,14 +518,22 @@ 

    With ListSets you can retrieve the set structure of a repositor 16 Quantum Physics physics:quant-ph 17 Quantitative Biology q-bio 18 Quantitative Finance q-fin -19 Statistics stat

    +19 Statistics stat + +{% endhighlight %} + + +### Retrieve an individual metadata record from a repository using the `GetRecord` verb. -

    Retrieve an individual metadata record from a repository using the GetRecord verb.

    +{% highlight r linenos %} +# Single provider, one identifier +md_getrecord(provider = "pensoft", identifier = "10.3897/zookeys.1.10") +{% endhighlight %} -
    1 # Single provider, one identifier
    -2 md_getrecord(provider = "pensoft", identifier = "10.3897/zookeys.1.10")
    -
              identifier  datestamp
    +
    +{% highlight text %}
    +          identifier  datestamp
     1 10.3897/zookeys.1.10 2008-07-04
                                                                                                dc.title
     1 A new candidate for a Gondwanaland distribution in the Zodariidae (Araneae): Australutica in Africa
    @@ -406,12 +550,21 @@ 

    Retrieve an individual metadata record from a repository using the Get dc.identifier.1 dc.language 1 http://www.pensoft.net/journals/zookeys/article/10/ en dc..attrs -1 http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd

    +1 http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd +{% endhighlight %} + + + +{% highlight r linenos %} -
    1 # Single provider, multiple identifiers
    -2 md_getrecord(provider = "pensoft", identifier = c("10.3897/zookeys.1.10", "10.3897/zookeys.4.57"))
    +# Single provider, multiple identifiers +md_getrecord(provider = "pensoft", identifier = c("10.3897/zookeys.1.10", "10.3897/zookeys.4.57")) +{% endhighlight %} -
              identifier  datestamp
    +
    +
    +{% highlight text %}
    +          identifier  datestamp
     1 10.3897/zookeys.1.10 2008-07-04
     2 10.3897/zookeys.4.57 2008-12-17
                                                                                                   dc.title
    @@ -437,191 +590,16 @@ 

    Retrieve an individual metadata record from a repository using the Get 2 http://www.pensoft.net/journals/zookeys/article/57/ en dc..attrs 1 http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd -2 http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd

    - -

    Cool, so I hope people find this post and package useful. Let me know what you think in comments below, or if you have code specific comments or additions, go to the GitHub repo for rmetadata. In a upcoming post I will show an example of what you can do with rmetadata in terms of an actual research question.

    - -
    - -

    Get the .Rmd file used to create this post at my github account - or .md file.

    - -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    - -
    - -
    -

    - - Getting data on your government - -

    - - - -
    - -

    I created an R package a while back to interact with some APIs that serve up data on what our elected represenatives are up to, including the New York Times Congress API, and the Sunlight Labs API.

    - -

    What kinds of things can you do with govdat? Here are a few examples.

    - -
    - -

    How do the two major parties differ in the use of certain words (searches the congressional record using the Sunlight Labs Capitol Words API)?

    - -
     1 # install_github('govdat', 'sckott')
    - 2 library(govdat)
    - 3 library(reshape2)
    - 4 library(ggplot2)
    - 5 
    - 6 dems <- sll_cw_dates(phrase = "science", start_date = "1996-01-20", end_date = "2012-09-01", 
    - 7     granularity = "year", party = "D", printdf = TRUE)
    - 8 repubs <- sll_cw_dates(phrase = "science", start_date = "1996-01-20", end_date = "2012-09-01", 
    - 9     granularity = "year", party = "R", printdf = TRUE)
    -10 df <- melt(rbind(data.frame(party = rep("D", nrow(dems)), dems), data.frame(party = rep("R", 
    -11     nrow(repubs)), repubs)))
    -12 df$count <- as.numeric(df$count)
    -13 
    -14 ggplot(df, aes(yearmonth, count, colour = party, group = party)) + geom_line() + 
    -15     scale_colour_manual(values = c("blue", "red")) + labs(y = "use of the word 'Science'") + 
    -16     theme_bw(base_size = 18) + opts(axis.text.x = theme_text(size = 10), panel.grid.major = theme_blank(), 
    -17     panel.grid.minor = theme_blank(), legend.position = c(0.2, 0.8))
    - -

    center

    - -
    - -

    Let's get some data on donations to individual elected representatives.

    - -
    1 library(plyr)
    -2 
    -3 # Let's get Nancy Pelosi's entity ID
    -4 sll_ts_aggregatesearch("Nancy Pelosi")[[1]]
    - -
    $name
    -[1] "Nancy Pelosi (D)"
    -
    -$count_given
    -[1] 0
    -
    -$firm_income
    -[1] 0
    -
    -$count_lobbied
    -[1] 0
    -
    -$seat
    -[1] "federal:house"
    -
    -$total_received
    -[1] 13769274
    -
    -$state
    -[1] "CA"
    -
    -$lobbying_firm
    -NULL
    -
    -$count_received
    -[1] 9852
    -
    -$party
    -[1] "D"
    -
    -$total_given
    -[1] 0
    -
    -$type
    -[1] "politician"
    -
    -$id
    -[1] "85ab2e74589a414495d18cc7a9233981"
    -
    -$non_firm_spending
    -[1] 0
    -
    -$is_superpac
    -NULL
    - -
    1 # Her entity ID
    -2 sll_ts_aggregatesearch("Nancy Pelosi")[[1]]$id
    - -
    [1] "85ab2e74589a414495d18cc7a9233981"
    - -
    1 # And search for her top donors by sector
    -2 nancy <- ldply(sll_ts_aggregatetopsectors(sll_ts_aggregatesearch("Nancy Pelosi")[[1]]$id))
    -3 nancy  # but just abbreviations for sectors
    - -
       sector count     amount
    -1       F  1847 2698672.00
    -2       P   981 2243050.00
    -3       H   829 1412700.00
    -4       K  1345 1409836.00
    -5       Q  1223 1393154.00
    -6       N   829 1166187.00
    -7       B   537  932044.00
    -8       W   724  760800.00
    -9       Y   820  664926.00
    -10      E   201  283575.00
    - -
    1 data(sll_ts_sectors)  # load sectors abbrevations data
    -2 nancy2 <- merge(nancy, sll_ts_sectors, by = "sector")  # attach full sector names
    -3 nancy2_melt <- melt(nancy2[, -1], id.vars = 3)
    -4 nancy2_melt$value <- as.numeric(nancy2_melt$value)
    -5 
    -6 # and lets plot some results
    -7 ggplot(nancy2_melt, aes(sector_name, value)) + geom_bar() + coord_flip() + facet_wrap(~variable, 
    -8     scales = "free", ncol = 1)
    - -

    center

    - -
    1 ## It looks like a lot of individual donations (the count facet) by
    -2 ## finance/insurance/realestate, but by amount, the most (by slim margin)
    -3 ## is from labor organizations.
    - -
    - -

    Or we may want to get a bio of a congressperson. Here we get Todd Akin of MO. And some twitter searching too? Indeed.

    - -
    1 out <- nyt_cg_memberbioroles("A000358")  # cool, lots of info, output cutoff for brevity
    -2 out[[3]][[1]][1:2]
    - -
    $member_id
    -[1] "A000358"
    -
    -$first_name
    -[1] "Todd"
    - -
    1 # we can get her twitter id from this bio, and search twitter using
    -2 # twitteR package
    -3 akintwitter <- out[[3]][[1]]$twitter_id
    -4 
    -5 # install.packages('twitteR')
    -6 library(twitteR)
    -7 tweets <- userTimeline(akintwitter, n = 100)
    -8 tweets[1:5]  # there's some gems in there no doubt
    - -
    [[1]]
    -[1] "RepToddAkin: Do you receive my Akin Alert e-newsletter?  Pick the issues you’d like to get updates on and sign up here!\nhttp://t.co/nZfiRjTF"
    -
    -[[2]]
    -[1] "RepToddAkin: If the 2001 &amp; 2003 tax policies expire, taxes will increase over $4 trillion in the next 10 years. America can't afford it. #stopthetaxhike"
    -
    -[[3]]
    -[1] "RepToddAkin: A govt agency's order shouldn't defy constitutional rights. I'm still working for #religiousfreedom and repealing the HHS mandate. #prolife"
    -
    -[[4]]
    -[1] "RepToddAkin: I am a cosponsor of the bill being considered today to limit abortions in DC. RT if you agree! #prolife http://t.co/Mesrjl0w"
    -
    -[[5]]
    -[1] "RepToddAkin: We need to #StopTheTaxHike. Raising taxes like the President wants would destroy more than 700,000 jobs. #4jobs http://t.co/KUTd0M7U"
    +2 http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd +{% endhighlight %} -
    -

    Get the .Rmd file used to create this post at my github account - or .md file.

    +Cool, so I hope people find this post and package useful. Let me know what you think in comments below, or if you have code specific comments or additions, go to [the GitHub repo for `rmetadata`](https://github.com/ropensci/rmetadata). In a upcoming post I will show an example of what you can do with `rmetadata` in terms of an actual research question. -
    +********* +#### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-09-15-rmetadata.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-09-17-rmetadata.md). -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    +#### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/).
    diff --git a/_site/page29/index.html b/_site/page29/index.html index 91227c5434..5711005446 100644 --- a/_site/page29/index.html +++ b/_site/page29/index.html @@ -61,284 +61,408 @@

    Recology

    - - Getting ecology and evolution journal titles from R + + Getting data on your government

    - + -
    + ********* -

    So I want to mine some #altmetrics data for some research I'm thinking about doing. The steps would be:

    +I created an R package a while back to interact with some APIs that serve up data on what our elected represenatives are up to, including the [New York Times Congress API](http://developer.nytimes.com/), and the [Sunlight Labs API](http://services.sunlightlabs.com/). -
      -
    • Get journal titles for ecology and evolution journals.
    • -
    • Get DOI's for all papers in all the above journal titles.
    • -
    • Get altmetrics data on each DOI.
    • -
    • Do some fancy analyses.
    • -
    • Make som pretty figs.
    • -
    • Write up results.
    • -
    +What kinds of things can you do with `govdat`? Here are a few examples. -

    It's early days, so jus working on the first step. However, getting a list of journals in ecology and evolution is frustratingly hard. This turns out to not be that easy if you are (1) trying to avoid Thomson Reuters, and (2) want a machine interface way to do it (read: API).

    +********* -

    Unfortunately, Mendeley's API does not have methods for getting a list of journals by field, or at least I don't know how to do it using their API. No worries though - Crossref comes to save the day. Here's my attempt at this using the Crossref OAI-PMH.

    +### How do the two major parties differ in the use of certain words (searches the congressional record using the Sunlight Labs Capitol Words API)? -
    +{% highlight r linenos %} +# install_github('govdat', 'sckott') +library(govdat) +library(reshape2) +library(ggplot2) -

    I wrote a little while loop to get journal titles from the Crossref OAI-PMH. This takes a while to run, but at least it works on my machine - hopefully yours too!

    +dems <- sll_cw_dates(phrase = "science", start_date = "1996-01-20", end_date = "2012-09-01", + granularity = "year", party = "D", printdf = TRUE) +repubs <- sll_cw_dates(phrase = "science", start_date = "1996-01-20", end_date = "2012-09-01", + granularity = "year", party = "R", printdf = TRUE) +df <- melt(rbind(data.frame(party = rep("D", nrow(dems)), dems), data.frame(party = rep("R", + nrow(repubs)), repubs))) +df$count <- as.numeric(df$count) -
     1 library(XML)
    - 2 library(RCurl)
    - 3 
    - 4 token <- "characters"  # define a iterator, also used for gettingn the resumptionToken
    - 5 nameslist <- list()  # define empty list to put joural titles in to
    - 6 while (is.character(token) == TRUE) {
    - 7     baseurl <- "http://oai.crossref.org/OAIHandler?verb=ListSets"
    - 8     if (token == "characters") {
    - 9         tok2 <- NULL
    -10     } else {
    -11         tok2 <- paste("&resumptionToken=", token, sep = "")
    -12     }
    -13     query <- paste(baseurl, tok2, sep = "")
    -14     crsets <- xmlToList(xmlParse(getURL(query)))
    -15     names <- as.character(sapply(crsets[[4]], function(x) x[["setName"]]))
    -16     nameslist[[token]] <- names
    -17     if (class(try(crsets[[2]]$.attrs[["resumptionToken"]])) == "try-error") {
    -18         stop("no more data")
    -19     } else token <- crsets[[2]]$.attrs[["resumptionToken"]]
    -20 }
    +ggplot(df, aes(yearmonth, count, colour = party, group = party)) + geom_line() + + scale_colour_manual(values = c("blue", "red")) + labs(y = "use of the word 'Science'") + + theme_bw(base_size = 18) + opts(axis.text.x = theme_text(size = 10), panel.grid.major = theme_blank(), + panel.grid.minor = theme_blank(), legend.position = c(0.2, 0.8)) +{% endhighlight %} -
    +![center](/public/img/unnamed-chunk-1.png) -

    Yay! Hopefully it worked if you tried it. Let's see how long the list of journal titles is.

    -
    1 sapply(nameslist, length)  # length of each list
    +********* -
                              characters c65ebc3f-b540-4672-9c00-f3135bf849e3 
    -                               10001                                10001 
    -6f61b343-a8f4-48f1-8297-c6f6909ca7f7 
    -                                6864 
    +### Let's get some data on donations to individual elected representatives. -
    1 allnames <- do.call(c, nameslist)  # combine to list
    -2 length(allnames)
    +{% highlight r linenos %} +library(plyr) -
    [1] 26866
    +# Let's get Nancy Pelosi's entity ID +sll_ts_aggregatesearch("Nancy Pelosi")[[1]] +{% endhighlight %} -
    -

    Now, let's use some regex to pull out the journal titles that are likely ecology and evolutionary biology journals. The ^ symbol says "the string must start here". The \\s means whitespace. The [] lets you specify a set of letters you are looking for, e.g., [Ee] means capital E OR lowercase e. I threw in titles that had the words systematic and natrualist too. Tried to trim any whitespace as well using the stringr package.

    -
     1 library(stringr)
    - 2 
    - 3 ecotitles <- as.character(allnames[str_detect(allnames, "^[Ee]cology|\\s[Ee]cology")])
    - 4 evotitles <- as.character(allnames[str_detect(allnames, "^[Ee]volution|\\s[Ee]volution")])
    - 5 systtitles <- as.character(allnames[str_detect(allnames, "^[Ss]ystematic|\\s[Ss]systematic")])
    - 6 naturalist <- as.character(allnames[str_detect(allnames, "[Nn]aturalist")])
    - 7 
    - 8 ecoevotitles <- unique(c(ecotitles, evotitles, systtitles, naturalist))  # combine to list
    - 9 ecoevotitles <- str_trim(ecoevotitles, side = "both")  # trim whitespace, if any
    -10 length(ecoevotitles)
    +{% highlight text %} +$name +[1] "Nancy Pelosi (D)" -
    [1] 188
    +$count_given +[1] 0 -
    1 # Just the first ten titles
    -2 ecoevotitles[1:10]
    +$firm_income +[1] 0 -
     [1] "Microbial Ecology in Health and Disease"           
    - [2] "Population Ecology"                                
    - [3] "Researches on Population Ecology"                  
    - [4] "Behavioral Ecology and Sociobiology"               
    - [5] "Microbial Ecology"                                 
    - [6] "Biochemical Systematics and Ecology"               
    - [7] "FEMS Microbiology Ecology"                         
    - [8] "Journal of Experimental Marine Biology and Ecology"
    - [9] "Applied Soil Ecology"                              
    -[10] "Forest Ecology and Management"                     
    +$count_lobbied +[1] 0 -
    +$seat +[1] "federal:house" -

    Get the .Rmd file used to create this post at my github account.

    +$total_received +[1] 13769274 -
    +$state +[1] "CA" -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    +$lobbying_firm +NULL -
    - -
    -

    - - Ecology unconference at ESA 2013 - -

    +$count_received +[1] 9852 + +$party +[1] "D" + +$total_given +[1] 0 + +$type +[1] "politician" + +$id +[1] "85ab2e74589a414495d18cc7a9233981" + +$non_firm_spending +[1] 0 + +$is_superpac +NULL + +{% endhighlight %} + + + +{% highlight r linenos %} + +# Her entity ID +sll_ts_aggregatesearch("Nancy Pelosi")[[1]]$id +{% endhighlight %} + + + +{% highlight text %} +[1] "85ab2e74589a414495d18cc7a9233981" +{% endhighlight %} + + + +{% highlight r linenos %} + +# And search for her top donors by sector +nancy <- ldply(sll_ts_aggregatetopsectors(sll_ts_aggregatesearch("Nancy Pelosi")[[1]]$id)) +nancy # but just abbreviations for sectors +{% endhighlight %} + + + +{% highlight text %} + sector count amount +1 F 1847 2698672.00 +2 P 981 2243050.00 +3 H 829 1412700.00 +4 K 1345 1409836.00 +5 Q 1223 1393154.00 +6 N 829 1166187.00 +7 B 537 932044.00 +8 W 724 760800.00 +9 Y 820 664926.00 +10 E 201 283575.00 +{% endhighlight %} + + + +{% highlight r linenos %} +data(sll_ts_sectors) # load sectors abbrevations data +nancy2 <- merge(nancy, sll_ts_sectors, by = "sector") # attach full sector names +nancy2_melt <- melt(nancy2[, -1], id.vars = 3) +nancy2_melt$value <- as.numeric(nancy2_melt$value) + +# and lets plot some results +ggplot(nancy2_melt, aes(sector_name, value)) + geom_bar() + coord_flip() + facet_wrap(~variable, + scales = "free", ncol = 1) +{% endhighlight %} + +![center](/public/img/unnamed-chunk-2.png) + +{% highlight r linenos %} + +## It looks like a lot of individual donations (the count facet) by +## finance/insurance/realestate, but by amount, the most (by slim margin) +## is from labor organizations. +{% endhighlight %} + + +********* + +### Or we may want to get a bio of a congressperson. Here we get Todd Akin of MO. And some twitter searching too? Indeed. + +{% highlight r linenos %} +out <- nyt_cg_memberbioroles("A000358") # cool, lots of info, output cutoff for brevity +out[[3]][[1]][1:2] +{% endhighlight %} - -
    -

    So I heard many people say after or during the recent ESA conference in Portland that they really enjoyed the converstations more than listening to talks or looking at posters.

    +{% highlight text %} +$member_id +[1] "A000358" -

    There was some chatter about doing an unconference associated with next year's ESA conference in Minneapolis. And Sandra Chung (@sandramchung) got on it and started a wiki that we can all conribute ideas to. The wiki is here: http://ecologyunconference.wikispaces.com/

    +$first_name +[1] "Todd" -

    What is an unconference? The idea of an unconference is to have a community organized meetup in which interactions among people are emphasized over the traditional lecture and poster format. For example, many sessions may just be organized a single idea, and people attending have a discussion around the topic. The format can be decided by the community.

    +{% endhighlight %} -

    What will we do there? The broadest restriction is that topics should be about science that happens online. You may say, "Well, real ecology happens in the field!". Yes, but a lot of the products of ecology are put online, and increasingly the discussion of the practice of ecology happens online. Check out the Science Online 2012 website for a little taste of what we hope to achieve next year.

    -

    How do I get involved? Go to the wiki and start contributing: http://ecologyunconference.wikispaces.com/. There are already some suggestions for topics...Here's a screenshot of the ideas for Session Proposals page:

    -

    ecologyunconf

    +{% highlight r linenos %} -
    +# we can get her twitter id from this bio, and search twitter using +# twitteR package +akintwitter <- out[[3]][[1]]$twitter_id -

    Important! Use the #esaun13 hashtag to talk about this unconference on Twitter, G+, and app.net.

    +# install.packages('twitteR') +library(twitteR) +tweets <- userTimeline(akintwitter, n = 100) +tweets[1:5] # there's some gems in there no doubt +{% endhighlight %} -
    -

    Get the .Rmd file used to create this post at my github account.

    -
    +{% highlight text %} +[[1]] +[1] "RepToddAkin: Do you receive my Akin Alert e-newsletter? Pick the issues you’d like to get updates on and sign up here!\nhttp://t.co/nZfiRjTF" -

    Written in Markdown, with help from knitr.

    +[[2]] +[1] "RepToddAkin: If the 2001 & 2003 tax policies expire, taxes will increase over $4 trillion in the next 10 years. America can't afford it. #stopthetaxhike" + +[[3]] +[1] "RepToddAkin: A govt agency's order shouldn't defy constitutional rights. I'm still working for #religiousfreedom and repealing the HHS mandate. #prolife" + +[[4]] +[1] "RepToddAkin: I am a cosponsor of the bill being considered today to limit abortions in DC. RT if you agree! #prolife http://t.co/Mesrjl0w" + +[[5]] +[1] "RepToddAkin: We need to #StopTheTaxHike. Raising taxes like the President wants would destroy more than 700,000 jobs. #4jobs http://t.co/KUTd0M7U" + +{% endhighlight %} + + +********* + +### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-09-01-gov-dat.Rmd) - or [.md file](https://github.com/sckott/sckott.github.com/tree/master/_posts/2012-09-01-gov-dat.md). + +********* + +### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/).

    - - Making matrices with zeros and ones + + Getting ecology and evolution journal titles from R

    - + + + ********* + +So I want to mine some #altmetrics data for some research I'm thinking about doing. The steps would be: + ++ Get journal titles for ecology and evolution journals. ++ Get DOI's for all papers in all the above journal titles. ++ Get altmetrics data on each DOI. ++ Do some fancy analyses. ++ Make som pretty figs. ++ Write up results. + +It's early days, so jus working on the first step. However, getting a list of journals in ecology and evolution is frustratingly hard. This turns out to not be that easy if you are (1) trying to avoid [Thomson Reuters](http://thomsonreuters.com/), and (2) want a machine interface way to do it (read: API). + +Unfortunately, Mendeley's API does not have methods for getting a list of journals by field, or at least I don't know how to do it using [their API](http://apidocs.mendeley.com/). No worries though - [Crossref](http://crossref.org/) comes to save the day. Here's my attempt at this using the [Crossref OAI-PMH](http://help.crossref.org/#using_oai_pmh). + +********* + +### I wrote a little while loop to get journal titles from the Crossref OAI-PMH. This takes a while to run, but at least it works on my machine - hopefully yours too! + +{% highlight r linenos %} +library(XML) +library(RCurl) + +token <- "characters" # define a iterator, also used for gettingn the resumptionToken +nameslist <- list() # define empty list to put joural titles in to +while (is.character(token) == TRUE) { + baseurl <- "http://oai.crossref.org/OAIHandler?verb=ListSets" + if (token == "characters") { + tok2 <- NULL + } else { + tok2 <- paste("&resumptionToken=", token, sep = "") + } + query <- paste(baseurl, tok2, sep = "") + crsets <- xmlToList(xmlParse(getURL(query))) + names <- as.character(sapply(crsets[[4]], function(x) x[["setName"]])) + nameslist[[token]] <- names + if (class(try(crsets[[2]]$.attrs[["resumptionToken"]])) == "try-error") { + stop("no more data") + } else token <- crsets[[2]]$.attrs[["resumptionToken"]] +} +{% endhighlight %} + -
    +********* -

    So I was trying to figure out a fast way to make matrices with randomly allocated 0 or 1 in each cell of the matrix. I reached out on Twitter, and got many responses (thanks tweeps!).

    +### Yay! Hopefully it worked if you tried it. Let's see how long the list of journal titles is. -
    +{% highlight r linenos %} +sapply(nameslist, length) # length of each list +{% endhighlight %} -

    Here is the solution I came up with. See if you can tell why it would be slow.

    -
    1 mm <- matrix(0, 10, 5)
    -2 apply(mm, c(1, 2), function(x) sample(c(0, 1), 1))
    -
          [,1] [,2] [,3] [,4] [,5]
    - [1,]    1    0    1    0    1
    - [2,]    0    0    1    1    1
    - [3,]    0    0    0    0    1
    - [4,]    0    1    1    0    1
    - [5,]    0    1    1    1    1
    - [6,]    1    0    1    1    1
    - [7,]    0    1    0    1    0
    - [8,]    0    0    1    0    1
    - [9,]    1    0    1    1    1
    -[10,]    1    0    0    1    1
    +{% highlight text %} + characters c65ebc3f-b540-4672-9c00-f3135bf849e3 + 10001 10001 +6f61b343-a8f4-48f1-8297-c6f6909ca7f7 + 6864 +{% endhighlight %} + + + +{% highlight r linenos %} +allnames <- do.call(c, nameslist) # combine to list +length(allnames) +{% endhighlight %} + + + +{% highlight text %} +[1] 26866 +{% endhighlight %} + + +********* + + +### Now, let's use some `regex` to pull out the journal titles that are likely ecology and evolutionary biology journals. The `^` symbol says "the string must start here". The `\\s` means whitespace. The `[]` lets you specify a set of letters you are looking for, e.g., `[Ee]` means capital `E` *OR* lowercase `e`. I threw in titles that had the words systematic and natrualist too. Tried to trim any whitespace as well using the `stringr` package. -
    +{% highlight r linenos %} +library(stringr) -

    Ted Hart (@distribecology) replied first with:

    +ecotitles <- as.character(allnames[str_detect(allnames, "^[Ee]cology|\\s[Ee]cology")]) +evotitles <- as.character(allnames[str_detect(allnames, "^[Ee]volution|\\s[Ee]volution")]) +systtitles <- as.character(allnames[str_detect(allnames, "^[Ss]ystematic|\\s[Ss]systematic")]) +naturalist <- as.character(allnames[str_detect(allnames, "[Nn]aturalist")]) -
    1 matrix(rbinom(10 * 5, 1, 0.5), ncol = 5, nrow = 10)
    +ecoevotitles <- unique(c(ecotitles, evotitles, systtitles, naturalist)) # combine to list +ecoevotitles <- str_trim(ecoevotitles, side = "both") # trim whitespace, if any +length(ecoevotitles) +{% endhighlight %} -
          [,1] [,2] [,3] [,4] [,5]
    - [1,]    1    1    0    1    1
    - [2,]    1    0    0    1    0
    - [3,]    0    1    0    0    0
    - [4,]    0    0    1    0    0
    - [5,]    1    0    1    0    0
    - [6,]    0    0    0    0    1
    - [7,]    1    0    0    0    0
    - [8,]    0    1    0    1    0
    - [9,]    1    1    1    1    0
    -[10,]    0    1    1    0    0
    -
    -

    Next, David Smith (@revodavid) and Rafael Maia (@hylospar) came up with about the same solution.

    +{% highlight text %} +[1] 188 +{% endhighlight %} -
    1 m <- 10
    -2 n <- 5
    -3 matrix(sample(0:1, m * n, replace = TRUE), m, n)
    -
          [,1] [,2] [,3] [,4] [,5]
    - [1,]    0    0    0    0    1
    - [2,]    0    0    0    0    0
    - [3,]    0    1    1    0    1
    - [4,]    1    0    0    1    0
    - [5,]    0    0    0    0    1
    - [6,]    1    0    1    1    1
    - [7,]    1    1    1    1    0
    - [8,]    0    0    0    1    1
    - [9,]    1    0    0    0    1
    -[10,]    0    1    0    1    1
    -
    +{% highlight r linenos %} -

    Then there was the solution by Luis Apiolaza (@zentree).

    +# Just the first ten titles +ecoevotitles[1:10] +{% endhighlight %} -
    1 m <- 10
    -2 n <- 5
    -3 round(matrix(runif(m * n), m, n))
    -
          [,1] [,2] [,3] [,4] [,5]
    - [1,]    0    1    1    0    0
    - [2,]    1    0    1    1    0
    - [3,]    1    0    1    0    0
    - [4,]    1    0    0    0    1
    - [5,]    1    0    1    1    0
    - [6,]    1    0    0    0    0
    - [7,]    1    0    0    0    0
    - [8,]    1    1    1    0    0
    - [9,]    0    0    0    0    1
    -[10,]    1    0    0    1    1
    -
    +{% highlight text %} + [1] "Microbial Ecology in Health and Disease" + [2] "Population Ecology" + [3] "Researches on Population Ecology" + [4] "Behavioral Ecology and Sociobiology" + [5] "Microbial Ecology" + [6] "Biochemical Systematics and Ecology" + [7] "FEMS Microbiology Ecology" + [8] "Journal of Experimental Marine Biology and Ecology" + [9] "Applied Soil Ecology" +[10] "Forest Ecology and Management" +{% endhighlight %} -

    Last, a solution was proposed using RcppArmadillo, but I couldn't get it to work on my machine, but here is the function anyway if someone can.

    +********* -
    1 library(inline)
    -2 library(RcppArmadillo)
    -3 f <- cxxfunction(body = "return wrap(arma::randu(5,10));", plugin = "RcppArmadillo")
    +### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-08-30-get-ecoevo-journal-titles.Rmd). -
    +********* -

    And here is the comparison of system.time for each solution.

    +### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/). + +
    + +
    +

    + + Ecology unconference at ESA 2013 + +

    + + -
    1 mm <- matrix(0, 10, 5)
    -2 m <- 10
    -3 n <- 5
    -4 
    -5 system.time(replicate(1000, apply(mm, c(1, 2), function(x) sample(c(0, 1), 1))))  # @recology_
    + ********* -
       user  system elapsed 
    -  0.470   0.002   0.471 
    +So I heard many people say after or during the recent ESA conference in Portland that they really enjoyed the converstations more than listening to talks or looking at posters. -
    1 system.time(replicate(1000, matrix(rbinom(10 * 5, 1, 0.5), ncol = 5, nrow = 10)))  # @distribecology
    +There was some chatter about doing an unconference associated with next year's ESA conference in Minneapolis. And Sandra Chung (@sandramchung) got on it and started a wiki that we can all conribute ideas to. The wiki is here: [http://ecologyunconference.wikispaces.com/](http://ecologyunconference.wikispaces.com/) -
       user  system elapsed 
    -  0.014   0.000   0.015 
    +What is an unconference? The idea of an unconference is to have a community organized meetup in which interactions among people are emphasized over the traditional lecture and poster format. For example, many sessions may just be organized a single idea, and people attending have a discussion around the topic. The format can be decided by the community. -
    1 system.time(replicate(1000, matrix(sample(0:1, m * n, replace = TRUE), m, n)))  # @revodavid & @hylospar
    +What will we do there? The broadest restriction is that topics should be about science that happens online. You may say, "Well, real ecology happens in the field!". Yes, but a lot of the products of ecology are put online, and increasingly the discussion of the practice of ecology happens online. Check out the [Science Online 2012 website](http://scienceonline2012.com/) for a little taste of what we hope to achieve next year. -
       user  system elapsed 
    -  0.015   0.000   0.014 
    +How do I get involved? Go to the wiki and start contributing: [http://ecologyunconference.wikispaces.com/](http://ecologyunconference.wikispaces.com/). There are already some suggestions for topics...Here's a screenshot of the ideas for Session Proposals page: -
    1 system.time(replicate(1000, round(matrix(runif(m * n), m, n)), ))  # @zentree
    +![ecologyunconf](/public/img/ecologyunconf.png) -
       user  system elapsed 
    -  0.014   0.000   0.014 
    +********* -

    If you want to take the time to learn C++ or already know it, the RcppArmadillo option would likely be the fastest, but I think (IMO) for many scientists, especially ecologists, we probably don't already know C++, so will stick to the next fastest options.

    +### Important! Use the #esaun13 hashtag to talk about this unconference on Twitter, G+, and app.net. -
    +********* -

    Get the .Rmd file used to create this post at my github account.

    +### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-08-30-making-matrices.Rmd). -
    +********* -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    +### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/).
    diff --git a/_site/page3/index.html b/_site/page3/index.html index 8994603a6a..8aa9a5b021 100644 --- a/_site/page3/index.html +++ b/_site/page3/index.html @@ -59,6 +59,195 @@

    Recology

      +
    +

    + + oai - an OAI-PMH client + +

    + + + + `oai` is a general purpose client to work with any 'OAI-PMH' service. The 'OAI-PMH' protocol is described at [http://www.openarchives.org/OAI/openarchivesprotocol.html](http://www.openarchives.org/OAI/openarchivesprotocol.html). The main functions follow the OAI-PMH verbs: + +* `GetRecord` +* `Identify` +* `ListIdentifiers` +* `ListMetadataFormats` +* `ListRecords` +* `ListSets` + +The repo is at [https://github.com/sckott/oai](https://github.com/sckott/oai) + +I will be using this in a number of packages I maintain that use OAI-PMH data services. If you try it, let me know what you think. + +This package is heading to rOpenSci soon: [https://github.com/ropensci/onboarding/issues/19](https://github.com/ropensci/onboarding/issues/19) + +Here's a few usage examples: + +## Install + +Is on CRAN now, but binaries may not be available yet. + + +```r +install.packages("oai") +``` + +Or install development version from GitHub + + +```r +devtools::install_github("sckott/oai") +``` + +Load `oai` + + +```r +library("oai") +``` + +## Identify + + +```r +id("http://oai.datacite.org/oai") +#> repositoryName baseURL protocolVersion +#> 1 DataCite MDS http://oai.datacite.org/oai 2.0 +#> adminEmail earliestDatestamp deletedRecord +#> 1 admin@datacite.org 2011-01-01T00:00:00Z persistent +#> granularity compression compression.1 +#> 1 YYYY-MM-DDThh:mm:ssZ gzip deflate +#> description +#> 1 oaioai.datacite.org:oai:oai.datacite.org:12425 +``` + +## ListIdentifiers + + +```r +list_identifiers(from = '2011-05-01T', until = '2011-09-01T') +#> 925 X 6 +#> +#> identifier datestamp setSpec setSpec.1 +#> 1 oai:oai.datacite.org:32153 2011-06-08T08:57:11Z TIB TIB.WDCC +#> 2 oai:oai.datacite.org:32200 2011-06-20T08:12:41Z TIB TIB.DAGST +#> 3 oai:oai.datacite.org:32220 2011-06-28T14:11:08Z TIB TIB.DAGST +#> 4 oai:oai.datacite.org:32241 2011-06-30T13:24:45Z TIB TIB.DAGST +#> 5 oai:oai.datacite.org:32255 2011-07-01T12:09:24Z TIB TIB.DAGST +#> 6 oai:oai.datacite.org:32282 2011-07-05T09:08:10Z TIB TIB.DAGST +#> 7 oai:oai.datacite.org:32309 2011-07-06T12:30:54Z TIB TIB.DAGST +#> 8 oai:oai.datacite.org:32310 2011-07-06T12:42:32Z TIB TIB.DAGST +#> 9 oai:oai.datacite.org:32325 2011-07-07T11:17:46Z TIB TIB.DAGST +#> 10 oai:oai.datacite.org:32326 2011-07-07T11:18:47Z TIB TIB.DAGST +#> .. ... ... ... ... +#> Variables not shown: setSpec.2 (chr), setSpec.3 (chr) +``` + +## Count Identifiers + + +```r +count_identifiers() +#> url count +#> 1 http://oai.datacite.org/oai 6350706 +``` + +## ListRecords + + +```r +list_records(from = '2011-05-01T', until = '2011-08-15T') +#> 126 X 46 +#> +#> identifier datestamp setSpec setSpec.1 +#> 1 oai:oai.datacite.org:32153 2011-06-08T08:57:11Z TIB TIB.WDCC +#> 2 oai:oai.datacite.org:32200 2011-06-20T08:12:41Z TIB TIB.DAGST +#> 3 oai:oai.datacite.org:32220 2011-06-28T14:11:08Z TIB TIB.DAGST +#> 4 oai:oai.datacite.org:32241 2011-06-30T13:24:45Z TIB TIB.DAGST +#> 5 oai:oai.datacite.org:32255 2011-07-01T12:09:24Z TIB TIB.DAGST +#> 6 oai:oai.datacite.org:32282 2011-07-05T09:08:10Z TIB TIB.DAGST +#> 7 oai:oai.datacite.org:32309 2011-07-06T12:30:54Z TIB TIB.DAGST +#> 8 oai:oai.datacite.org:32310 2011-07-06T12:42:32Z TIB TIB.DAGST +#> 9 oai:oai.datacite.org:32325 2011-07-07T11:17:46Z TIB TIB.DAGST +#> 10 oai:oai.datacite.org:32326 2011-07-07T11:18:47Z TIB TIB.DAGST +#> .. ... ... ... ... +#> Variables not shown: title (chr), creator (chr), creator.1 (chr), +#> creator.2 (chr), creator.3 (chr), creator.4 (chr), creator.5 (chr), +#> creator.6 (chr), creator.7 (chr), publisher (chr), date (chr), +#> identifier.2 (chr), identifier.1 (chr), subject (chr), description +#> (chr), description.1 (chr), contributor (chr), language (chr), type +#> (chr), type.1 (chr), format (chr), format.1 (chr), rights (chr), +#> subject.1 (chr), relation (chr), subject.2 (chr), subject.3 (chr), +#> subject.4 (chr), setSpec.2 (chr), setSpec.3 (chr), format.2 (chr), +#> subject.5 (chr), subject.6 (chr), subject.7 (chr), description.2 +#> (chr), description.3 (chr), description.4 (chr), description.5 (chr), +#> title.1 (chr), relation.1 (chr), relation.2 (chr), contributor.1 +#> (chr) +``` + +## GetRecords + + +```r +get_records(c("oai:oai.datacite.org:32255", "oai:oai.datacite.org:32325")) +#> 2 X 23 +#> +#> identifier datestamp setSpec setSpec.1 +#> 1 oai:oai.datacite.org:32255 2011-07-01T12:09:24Z TIB TIB.DAGST +#> 2 oai:oai.datacite.org:32325 2011-07-07T11:17:46Z TIB TIB.DAGST +#> Variables not shown: title (chr), creator (chr), creator.1 (chr), +#> creator.2 (chr), creator.3 (chr), publisher (chr), date (chr), +#> identifier.1 (chr), subject (chr), subject.1 (chr), description +#> (chr), description.1 (chr), contributor (chr), language (chr), type +#> (chr), type.1 (chr), format (chr), format.1 (chr), rights (chr) +``` + +## List MetadataFormats + + +```r +list_metadataformats(id = "oai:oai.datacite.org:32348") +#> $`oai:oai.datacite.org:32348` +#> metadataPrefix +#> 1 oai_dc +#> 2 datacite +#> 3 oai_datacite +#> schema +#> 1 http://www.openarchives.org/OAI/2.0/oai_dc.xsd +#> 2 http://schema.datacite.org/meta/nonexistant/nonexistant.xsd +#> 3 http://schema.datacite.org/oai/oai-1.0/oai.xsd +#> metadataNamespace +#> 1 http://www.openarchives.org/OAI/2.0/oai_dc/ +#> 2 http://datacite.org/schema/nonexistant +#> 3 http://schema.datacite.org/oai/oai-1.0/ +``` + +## List Sets + + +```r +list_sets("http://oai.datacite.org/oai") +#> 1227 X 2 +#> +#> setSpec +#> 1 REFQUALITY +#> 2 ANDS +#> 3 ANDS.REFQUALITY +#> 4 ANDS.CENTRE-1 +#> 5 ANDS.CENTRE-1.REFQUALITY +#> 6 ANDS.CENTRE-2 +#> 7 ANDS.CENTRE-2.REFQUALITY +#> 8 ANDS.CENTRE-3 +#> 9 ANDS.CENTRE-3.REFQUALITY +#> 10 ANDS.CENTRE-5 +#> .. ... +#> Variables not shown: setName (chr) +``` + +
    +

    @@ -68,426 +257,511 @@

    -

    Finally, we got fulltext up on CRAN - our first commit was May last year. fulltext is a package to facilitate text mining. It focuses on open access journals. This package makes it easier to search for articles, download those articles in full text if available, convert pdf format to plain text, and extract text chunks for vizualization/analysis. We are planning to add bits for analysis in future versions. We've been working on this package for a while now. It has a lot of moving parts and package dependencies, so it took a while to get a first useable version.

    - -

    The tasks facilitated by fulltext in bullet form:

    - -
      -
    • Search - search for articles
    • -
    • Retrieve - get full text
    • -
    • Convert - convert from format X to Y
    • -
    • Text - if needed, get text from pdfs/etc.
    • -
    • Extract - pull out the bits of articles that you want
    • -
    - -

    I won't be surprised if users uncover a lot of bugs in this package given the huge number of publishers/journals users want to get literature data from, and the surely wide diversity of use cases. But I thought it was important to get out a first version to get feedback on the user interface, and gather use cases.

    - -

    We hope that this package can help bring text-mining to the masses - making it easy for anyone to do do, not just text-mining experts.

    - -

    If you have any feedback, please do get in touch in the issue tracker for fulltext at https://github.com/ropensci/fulltext/issues - If you have use case thoughts, the rOpenSci discussion forum might be a good place to go.

    - -

    Let's kick the tires, shall we?

    - -

    Install

    - -

    Will be on CRAN soon, not as of AM PDT on 2015-08-07.

    -
    install.packages("fulltext")
    -# if binaries not avail. yet on your favorite CRAN mirror
    -install.packages("https://cran.rstudio.com/src/contrib/fulltext_0.1.0.tar.gz", repos = NULL, type = "source")
    -
    -

    Or install development version from GitHub

    -
    devtools::install_github("ropensci/fulltext")
    -
    -

    Load fulltext

    -
    library("fulltext")
    -
    -

    Search for articles

    - -

    Currently, there are hooks for searching for articles from PLOS, BMC, Crossref, Entrez, arXiv, and BioRxiv. We'll add more in the future, but that does cover a lot of articles, especially given inclusion of Crossref (which mints most DOIs) and Entrez (which houses PMC and Pubmed).

    - -

    An example: Search for the term ecology in PLOS journals.

    -
    (res1 <- ft_search(query = 'ecology', from = 'plos'))
    -#> Query:
    -#>   [ecology] 
    -#> Found:
    -#>   [PLoS: 28589; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] 
    -#> Returned:
    -#>   [PLoS: 10; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0]
    -
    -

    Each publisher/search-engine has a slot with metadata and data, saying how many articles were found and how many were returned. We can dig into what PLOS gave us:

    -
    res1$plos
    -#> Query: [ecology] 
    -#> Records found, returned: [28589, 10] 
    -#> License: [CC-BY] 
    -#>                                                         id
    -#> 1                             10.1371/journal.pone.0059813
    -#> 2                             10.1371/journal.pone.0001248
    -#> 3  10.1371/annotation/69333ae7-757a-4651-831c-f28c5eb02120
    -#> 4                             10.1371/journal.pone.0080763
    -#> 5                             10.1371/journal.pone.0102437
    -#> 6                             10.1371/journal.pone.0017342
    -#> 7                             10.1371/journal.pone.0091497
    -#> 8                             10.1371/journal.pone.0092931
    -#> 9  10.1371/annotation/28ac6052-4f87-4b88-a817-0cd5743e83d6
    -#> 10                            10.1371/journal.pcbi.1003594
    -
    -

    For each of the data sources to search on you can pass in additional options (basically, you can use the query parameters in the functions that hit each service). Here, we can modify our search to PLOS by requesting a particular set of fields with the fl parameter (PLOS uses a Solr backed search engine, and fl is short for fields in Solr land):

    -
    ft_search(query = 'ecology', from = 'plos', plosopts = list(
    -   fl = c('id','author','eissn','journal','counter_total_all','alm_twitterCount')))
    -#> Query:
    -#>   [ecology] 
    -#> Found:
    -#>   [PLoS: 28589; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] 
    -#> Returned:
    -#>   [PLoS: 10; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0]
    -
    -
    -

    Note that PLOS is a bit unique in allowing you to request specific parts of articles. Other sources in ft_search() don't let you do that.

    -
    - -

    Get full text

    - -

    After you've found the set of articles you want to get full text for, we can use the results from ft_search() to grab full text. ft_get() accepts a character vector of list of DOIs (or PMC IDs if fetching from Entrez), or the output of ft_search().

    -
    (out <- ft_get(res1))
    -#> [Docs] 8 
    -#> [Source] R session  
    -#> [IDs] 10.1371/journal.pone.0059813 10.1371/journal.pone.0001248
    -#>      10.1371/journal.pone.0080763 10.1371/journal.pone.0102437
    -#>      10.1371/journal.pone.0017342 10.1371/journal.pone.0091497
    -#>      10.1371/journal.pone.0092931 10.1371/journal.pcbi.1003594 ...
    -
    -

    We got eight articles in full text in the result. We didn't get 10, even though 10 were returned from ft_search() because PLOS often returns records for annotations, that is, comments on articles, which we auto-seive out within ft_get().

    - -

    Dig in to the PLOS data

    -
    out$plos
    -#> $found
    -#> [1] 8
    -#> 
    -#> $dois
    -#> [1] "10.1371/journal.pone.0059813" "10.1371/journal.pone.0001248"
    -#> [3] "10.1371/journal.pone.0080763" "10.1371/journal.pone.0102437"
    -#> [5] "10.1371/journal.pone.0017342" "10.1371/journal.pone.0091497"
    -#> [7] "10.1371/journal.pone.0092931" "10.1371/journal.pcbi.1003594"
    -#> 
    -#> $data
    -#> $data$backend
    -#> NULL
    -#> 
    -#> $data$path
    -#> [1] "session"
    -#> 
    -#> $data$data
    -#> 8 full-text articles retrieved 
    -#> Min. Length: 3828 - Max. Length: 104702 
    -#> DOIs: 10.1371/journal.pone.0059813 10.1371/journal.pone.0001248
    -#>   10.1371/journal.pone.0080763 10.1371/journal.pone.0102437
    -#>   10.1371/journal.pone.0017342 10.1371/journal.pone.0091497
    -#>   10.1371/journal.pone.0092931 10.1371/journal.pcbi.1003594 ... 
    -#> 
    -#> NOTE: extract xml strings like output['<doi>']
    -#> 
    -#> $opts
    -#> $opts$doi
    -#> [1] "10.1371/journal.pone.0059813" "10.1371/journal.pone.0001248"
    -#> [3] "10.1371/journal.pone.0080763" "10.1371/journal.pone.0102437"
    -#> [5] "10.1371/journal.pone.0017342" "10.1371/journal.pone.0091497"
    -#> [7] "10.1371/journal.pone.0092931" "10.1371/journal.pcbi.1003594"
    -#> 
    -#> $opts$callopts
    -#> list()
    -
    -

    Dig in further to get to one of the articles in XML format

    -
    library("xml2")
    -xml2::read_xml(out$plos$data$data$`10.1371/journal.pone.0059813`)
    -#> {xml_document}
    -#> <article>
    -#> [1] <front>\n<journal-meta>\n<journal-id journal-id-type="nlm-ta">PLoS O ...
    -#> [2] <body>\n  <sec id="s1">\n<title>Introduction</title>\n<p>Ecologists  ...
    -#> [3] <back>\n<ack>\n<p>Curtis Flather, Mark Burgman, Leon Blaustein, Yaac ...
    -
    -

    Now with the xml, you can dig into whatever you like, e.g., using xml2 or rvest.

    - -

    Extract text from pdfs

    - -

    Ideally for text mining you have access to XML or other text based formats. However, sometimes you only have access to PDFs. In this case you want to extract text from PDFs. fulltext can help with that.

    - -

    You can extract from any pdf from a file path, like:

    -
    path <- system.file("examples", "example1.pdf", package = "fulltext")
    -ft_extract(path)
    -#> <document>/Library/Frameworks/R.framework/Versions/3.2/Resources/library/fulltext/examples/example1.pdf
    -#>   Pages: 18
    -#>   Title: Suffering and mental health among older people living in nursing homes---a mixed-methods study
    -#>   Producer: pdfTeX-1.40.10
    -#>   Creation date: 2015-07-17
    -
    -

    Let's search for articles from arXiv, a preprint service. Here, get pdf from an article with ID cond-mat/9309029:

    -
    res <- ft_get('cond-mat/9309029', from = "arxiv")
    -res2 <- ft_extract(res)
    -res2$arxiv$data
    -#> $backend
    -#> NULL
    -#> 
    -#> $path
    -#> $path$`cond-mat/9309029`
    -#> [1] "~/.fulltext/cond-mat_9309029.pdf"
    -#> 
    -#> 
    -#> $data
    -#> $data[[1]]
    -#> <document>/Users/sacmac/.fulltext/cond-mat_9309029.pdf
    -#>   Pages: 14
    -#>   Title: arXiv:cond-mat/9309029v8  26 Jan 1994
    -#>   Producer: GPL Ghostscript SVN PRE-RELEASE 8.62
    -#>   Creation date: 2008-02-06
    -
    -

    And a short snippet of the full text

    -
    res2$arxiv$data$data[[1]]$data
    -#> "arXiv:cond-mat/9309029v8 26 Jan 1994, , FERMILAB-PUB-93/15-T March 1993, Revised:
    -#> January 1994, The Thermodynamics and Economics of Waste, Dallas C. Kennedy, Research
    -#> Associate, Fermi National Accelerator Laboratory, P.O. Box 500 MS106, Batavia, Illinois
    -#> 60510 USA, Abstract, The increasingly relevant problem of natural resource use and
    -#> waste production, disposal, and reuse is examined from several viewpoints: economic,
    -#> technical, and thermodynamic. Alternative economies are studied, with emphasis on
    -#> recycling of waste to close the natural resource cycle. The physical nature of human
    -#> economies and constraints on recycling and energy efficiency are stated in terms
    -#> ..."
    -
    -

    Extract text chunks

    - -

    We have a few functions to help you pull out certain parts of an article. For example, perhaps you want to get just the authors from your articles, or just the abstracts.

    - -

    Here, we'll search for some PLOS articles, then get their full text, then extract various parts of each article with chunks().

    -
    res <- ft_search(query = "ecology", from = "plos")
    -(x <- ft_get(res))
    -#> [Docs] 8 
    -#> [Source] R session  
    -#> [IDs] 10.1371/journal.pone.0059813 10.1371/journal.pone.0001248
    -#>      10.1371/journal.pone.0080763 10.1371/journal.pone.0102437
    -#>      10.1371/journal.pone.0017342 10.1371/journal.pone.0091497
    -#>      10.1371/journal.pone.0092931 10.1371/journal.pcbi.1003594 ...
    -
    -

    Extract DOIs

    -
    x %>% chunks("doi")
    -#> $plos
    -#> $plos$`10.1371/journal.pone.0059813`
    -#> $plos$`10.1371/journal.pone.0059813`$doi
    -#> [1] "10.1371/journal.pone.0059813"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0001248`
    -#> $plos$`10.1371/journal.pone.0001248`$doi
    -#> [1] "10.1371/journal.pone.0001248"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0080763`
    -#> $plos$`10.1371/journal.pone.0080763`$doi
    -#> [1] "10.1371/journal.pone.0080763"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0102437`
    -#> $plos$`10.1371/journal.pone.0102437`$doi
    -#> [1] "10.1371/journal.pone.0102437"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0017342`
    -#> $plos$`10.1371/journal.pone.0017342`$doi
    -#> [1] "10.1371/journal.pone.0017342"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0091497`
    -#> $plos$`10.1371/journal.pone.0091497`$doi
    -#> [1] "10.1371/journal.pone.0091497"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0092931`
    -#> $plos$`10.1371/journal.pone.0092931`$doi
    -#> [1] "10.1371/journal.pone.0092931"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pcbi.1003594`
    -#> $plos$`10.1371/journal.pcbi.1003594`$doi
    -#> [1] "10.1371/journal.pcbi.1003594"
    -
    -

    Extract DOIs and categories

    -
    x %>% chunks(c("doi","categories"))
    -#> $plos
    -#> $plos$`10.1371/journal.pone.0059813`
    -#> $plos$`10.1371/journal.pone.0059813`$doi
    -#> [1] "10.1371/journal.pone.0059813"
    -#> 
    -#> $plos$`10.1371/journal.pone.0059813`$categories
    -#>  [1] "Research Article"                 "Biology"                         
    -#>  [3] "Ecology"                          "Community ecology"               
    -#>  [5] "Species interactions"             "Science policy"                  
    -#>  [7] "Research assessment"              "Research monitoring"             
    -#>  [9] "Research funding"                 "Government funding of science"   
    -#> [11] "Research laboratories"            "Science policy and economics"    
    -#> [13] "Science and technology workforce" "Careers in research"             
    -#> [15] "Social and behavioral sciences"   "Sociology"                       
    -#> [17] "Sociology of knowledge"          
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0001248`
    -#> $plos$`10.1371/journal.pone.0001248`$doi
    -#> [1] "10.1371/journal.pone.0001248"
    -#> 
    -#> $plos$`10.1371/journal.pone.0001248`$categories
    -#> [1] "Research Article"             "Ecology"                     
    -#> [3] "Ecology/Ecosystem Ecology"    "Ecology/Evolutionary Ecology"
    -#> [5] "Ecology/Theoretical Ecology" 
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0080763`
    -#> $plos$`10.1371/journal.pone.0080763`$doi
    -#> [1] "10.1371/journal.pone.0080763"
    -#> 
    -#> $plos$`10.1371/journal.pone.0080763`$categories
    -#>  [1] "Research Article"     "Biology"              "Ecology"             
    -#>  [4] "Autecology"           "Behavioral ecology"   "Community ecology"   
    -#>  [7] "Evolutionary ecology" "Population ecology"   "Evolutionary biology"
    -#> [10] "Behavioral ecology"   "Evolutionary ecology" "Population biology"  
    -#> [13] "Population ecology"  
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0102437`
    -#> $plos$`10.1371/journal.pone.0102437`$doi
    -#> [1] "10.1371/journal.pone.0102437"
    -#> 
    -#> $plos$`10.1371/journal.pone.0102437`$categories
    -#>  [1] "Research Article"                  
    -#>  [2] "Biology and life sciences"         
    -#>  [3] "Biogeography"                      
    -#>  [4] "Ecology"                           
    -#>  [5] "Ecosystems"                        
    -#>  [6] "Ecosystem engineering"             
    -#>  [7] "Ecosystem functioning"             
    -#>  [8] "Industrial ecology"                
    -#>  [9] "Spatial and landscape ecology"     
    -#> [10] "Urban ecology"                     
    -#> [11] "Computer and information sciences" 
    -#> [12] "Geoinformatics"                    
    -#> [13] "Spatial analysis"                  
    -#> [14] "Earth sciences"                    
    -#> [15] "Geography"                         
    -#> [16] "Human geography"                   
    -#> [17] "Cultural geography"                
    -#> [18] "Social geography"                  
    -#> [19] "Ecology and environmental sciences"
    -#> [20] "Conservation science"              
    -#> [21] "Environmental protection"          
    -#> [22] "Nature-society interactions"       
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0017342`
    -#> $plos$`10.1371/journal.pone.0017342`$doi
    -#> [1] "10.1371/journal.pone.0017342"
    -#> 
    -#> $plos$`10.1371/journal.pone.0017342`$categories
    -#>  [1] "Research Article"     "Biology"              "Ecology"             
    -#>  [4] "Community ecology"    "Community assembly"   "Community structure" 
    -#>  [7] "Niche construction"   "Ecological metrics"   "Species diversity"   
    -#> [10] "Species richness"     "Biodiversity"         "Biogeography"        
    -#> [13] "Population ecology"   "Mathematics"          "Statistics"          
    -#> [16] "Biostatistics"        "Statistical theories" "Ecology"             
    -#> [19] "Mathematics"         
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0091497`
    -#> $plos$`10.1371/journal.pone.0091497`$doi
    -#> [1] "10.1371/journal.pone.0091497"
    -#> 
    -#> $plos$`10.1371/journal.pone.0091497`$categories
    -#> [1] "Correction"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pone.0092931`
    -#> $plos$`10.1371/journal.pone.0092931`$doi
    -#> [1] "10.1371/journal.pone.0092931"
    -#> 
    -#> $plos$`10.1371/journal.pone.0092931`$categories
    -#> [1] "Correction"
    -#> 
    -#> 
    -#> $plos$`10.1371/journal.pcbi.1003594`
    -#> $plos$`10.1371/journal.pcbi.1003594`$doi
    -#> [1] "10.1371/journal.pcbi.1003594"
    -#> 
    -#> $plos$`10.1371/journal.pcbi.1003594`$categories
    -#> [1] "Research Article"          "Biology and life sciences"
    -#> [3] "Computational biology"     "Microbiology"             
    -#> [5] "Theoretical biology"
    -
    -

    tabularize attempts to help you put the data that comes out of chunks() in to a data.frame, that we all know and love.

    -
    x %>% chunks(c("doi", "history")) %>% tabularize()
    -#> $plos
    -#>                            doi history.received history.accepted
    -#> 1 10.1371/journal.pone.0059813       2012-09-16       2013-02-19
    -#> 2 10.1371/journal.pone.0001248       2007-07-02       2007-11-06
    -#> 3 10.1371/journal.pone.0080763       2013-08-15       2013-10-16
    -#> 4 10.1371/journal.pone.0102437       2013-11-27       2014-06-19
    -#> 5 10.1371/journal.pone.0017342       2010-08-24       2011-01-31
    -#> 6 10.1371/journal.pone.0091497             <NA>             <NA>
    -#> 7 10.1371/journal.pone.0092931             <NA>             <NA>
    -#> 8 10.1371/journal.pcbi.1003594       2014-01-09       2014-03-14
    -
    -

    Bring it all together

    - -

    With the pieces above, let's see what it looks like all in one go. Here, we'll search for articles on climate change, then visualize word usage in those articles.

    - -

    Search

    -
    (out <- ft_search(query = 'climate change', from = 'plos', limit = 100))
    -#> Query:
    -#>   [climate change] 
    -#> Found:
    -#>   [PLoS: 11737; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] 
    -#> Returned:
    -#>   [PLoS: 100; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0]
    -
    -

    Get full text

    -
    (texts <- ft_get(out))
    -#> [Docs] 99 
    -#> [Source] R session  
    -#> [IDs] 10.1371/journal.pone.0054839 10.1371/journal.pone.0045683
    -#>      10.1371/journal.pone.0050182 10.1371/journal.pone.0118489
    -#>      10.1371/journal.pone.0053646 10.1371/journal.pone.0015103
    -#>      10.1371/journal.pone.0008320 10.1371/journal.pmed.1001227
    -#>      10.1371/journal.pmed.1001374 10.1371/journal.pone.0097480 ...
    -
    -

    Because PLOS returns XML, we don't need to do a PDF extraction step. However, if we got full text from arXiv or bioRxiv, we'd need to extract from PDFs first.

    - -

    Pull out chunks

    -
    abs <- texts %>% chunks("abstract")
    -
    -

    Let's pull out just the text

    -
    abs <- lapply(abs$plos, function(z) {
    -  paste0(z$abstract, collapse = " ")
    -})
    -
    -

    Analyze

    - -

    Using the tm package, we can analyze our articles

    -
    library("tm")
    -corp <- VCorpus(VectorSource(abs))
    -# remove stop words, strip whitespace, remove punctuation
    -corp <- tm_map(corp, removeWords, stopwords("english"))
    -corp <- tm_map(corp, stripWhitespace)
    -corp <- tm_map(corp, removePunctuation)
    -# Make a term document matrix
    -tdm <- TermDocumentMatrix(corp)
    -# remove sparse terms
    -tdm <- removeSparseTerms(tdm, sparse = 0.8)
    -# get data
    -rs <- rowSums(as.matrix(tdm))
    -df <- data.frame(word = names(rs), n = unname(rs), stringsAsFactors = FALSE)
    -
    -

    Visualize

    -
    library("ggplot2")
    -ggplot(df, aes(reorder(word, n), n)) +
    -  geom_point() +
    -  coord_flip() +
    -  labs(y = "Count", x = "Word")
    -
    -

    plot of chunk unnamed-chunk-23

    + Finally, we got `fulltext` up on CRAN - our first commit was [May last year](https://github.com/ropensci/fulltext/commit/2d4f7e270040b2c8914853113073fc4d3134445e). `fulltext` is a package to facilitate text mining. It focuses on open access journals. This package makes it easier to search for articles, download those articles in full text if available, convert pdf format to plain text, and extract text chunks for vizualization/analysis. We are planning to add bits for analysis in future versions. We've been working on this package for a while now. It has a lot of moving parts and package dependencies, so it took a while to get a first useable version. + +The tasks facilitated by `fulltext` in bullet form: + +* Search - search for articles +* Retrieve - get full text +* Convert - convert from format X to Y +* Text - if needed, get text from pdfs/etc. +* Extract - pull out the bits of articles that you want + +I won't be surprised if users uncover a lot of bugs in this package given the huge number of publishers/journals users want to get literature data from, and the surely wide diversity of use cases. But I thought it was important to get out a first version to get feedback on the user interface, and gather use cases. + +We hope that this package can help bring text-mining to the masses - making it easy for anyone to do do, not just text-mining experts. + +If you have any feedback, please do get in touch in the issue tracker for `fulltext` at https://github.com/ropensci/fulltext/issues - If you have use case thoughts, the [rOpenSci discussion forum](https://discuss.ropensci.org/) might be a good place to go. + +Let's kick the tires, shall we? + +## Install + +Will be on CRAN soon, not as of AM PDT on 2015-08-07. + + +```r +install.packages("fulltext") +# if binaries not avail. yet on your favorite CRAN mirror +install.packages("https://cran.rstudio.com/src/contrib/fulltext_0.1.0.tar.gz", repos = NULL, type = "source") +``` + +Or install development version from GitHub + + +```r +devtools::install_github("ropensci/fulltext") +``` + +Load `fulltext` + + +```r +library("fulltext") +``` + +## Search for articles + +Currently, there are hooks for searching for articles from PLOS, BMC, Crossref, Entrez, arXiv, and BioRxiv. We'll add more in the future, but that does cover a lot of articles, especially given inclusion of Crossref (which mints most DOIs) and Entrez (which houses PMC and Pubmed). + +An example: Search for the term _ecology_ in PLOS journals. + + +```r +(res1 <- ft_search(query = 'ecology', from = 'plos')) +#> Query: +#> [ecology] +#> Found: +#> [PLoS: 28589; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] +#> Returned: +#> [PLoS: 10; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] +``` + +Each publisher/search-engine has a slot with metadata and data, saying how many articles were found and how many were returned. We can dig into what PLOS gave us: + + +```r +res1$plos +#> Query: [ecology] +#> Records found, returned: [28589, 10] +#> License: [CC-BY] +#> id +#> 1 10.1371/journal.pone.0059813 +#> 2 10.1371/journal.pone.0001248 +#> 3 10.1371/annotation/69333ae7-757a-4651-831c-f28c5eb02120 +#> 4 10.1371/journal.pone.0080763 +#> 5 10.1371/journal.pone.0102437 +#> 6 10.1371/journal.pone.0017342 +#> 7 10.1371/journal.pone.0091497 +#> 8 10.1371/journal.pone.0092931 +#> 9 10.1371/annotation/28ac6052-4f87-4b88-a817-0cd5743e83d6 +#> 10 10.1371/journal.pcbi.1003594 +``` + +For each of the data sources to search on you can pass in additional options (basically, you can use the query parameters in the functions that hit each service). Here, we can modify our search to PLOS by requesting a particular set of fields with the `fl` parameter (PLOS uses a Solr backed search engine, and `fl` is short for `fields` in Solr land): + + +```r +ft_search(query = 'ecology', from = 'plos', plosopts = list( + fl = c('id','author','eissn','journal','counter_total_all','alm_twitterCount'))) +#> Query: +#> [ecology] +#> Found: +#> [PLoS: 28589; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] +#> Returned: +#> [PLoS: 10; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] +``` + +> Note that PLOS is a bit unique in allowing you to request specific parts of articles. Other sources in ft_search() don't let you do that. + +## Get full text + +After you've found the set of articles you want to get full text for, we can use the results from `ft_search()` to grab full text. `ft_get()` accepts a character vector of list of DOIs (or PMC IDs if fetching from Entrez), or the output of `ft_search()`. + + +```r +(out <- ft_get(res1)) +#> [Docs] 8 +#> [Source] R session +#> [IDs] 10.1371/journal.pone.0059813 10.1371/journal.pone.0001248 +#> 10.1371/journal.pone.0080763 10.1371/journal.pone.0102437 +#> 10.1371/journal.pone.0017342 10.1371/journal.pone.0091497 +#> 10.1371/journal.pone.0092931 10.1371/journal.pcbi.1003594 ... +``` + +We got eight articles in full text in the result. We didn't get 10, even though 10 were returned from `ft_search()` because PLOS often returns records for annotations, that is, comments on articles, which we auto-seive out within `ft_get()`. + +Dig in to the PLOS data + + +```r +out$plos +#> $found +#> [1] 8 +#> +#> $dois +#> [1] "10.1371/journal.pone.0059813" "10.1371/journal.pone.0001248" +#> [3] "10.1371/journal.pone.0080763" "10.1371/journal.pone.0102437" +#> [5] "10.1371/journal.pone.0017342" "10.1371/journal.pone.0091497" +#> [7] "10.1371/journal.pone.0092931" "10.1371/journal.pcbi.1003594" +#> +#> $data +#> $data$backend +#> NULL +#> +#> $data$path +#> [1] "session" +#> +#> $data$data +#> 8 full-text articles retrieved +#> Min. Length: 3828 - Max. Length: 104702 +#> DOIs: 10.1371/journal.pone.0059813 10.1371/journal.pone.0001248 +#> 10.1371/journal.pone.0080763 10.1371/journal.pone.0102437 +#> 10.1371/journal.pone.0017342 10.1371/journal.pone.0091497 +#> 10.1371/journal.pone.0092931 10.1371/journal.pcbi.1003594 ... +#> +#> NOTE: extract xml strings like output[''] +#> +#> $opts +#> $opts$doi +#> [1] "10.1371/journal.pone.0059813" "10.1371/journal.pone.0001248" +#> [3] "10.1371/journal.pone.0080763" "10.1371/journal.pone.0102437" +#> [5] "10.1371/journal.pone.0017342" "10.1371/journal.pone.0091497" +#> [7] "10.1371/journal.pone.0092931" "10.1371/journal.pcbi.1003594" +#> +#> $opts$callopts +#> list() +``` + +Dig in further to get to one of the articles in XML format + + +```r +library("xml2") +xml2::read_xml(out$plos$data$data$`10.1371/journal.pone.0059813`) +#> {xml_document} +#>
    +#> [1] \n\nPLoS O ... +#> [2] \n \nIntroduction\n

    Ecologists ... +#> [3] \n\n

    Curtis Flather, Mark Burgman, Leon Blaustein, Yaac ... +``` + +Now with the xml, you can dig into whatever you like, e.g., using `xml2` or `rvest`. + +## Extract text from pdfs + +Ideally for text mining you have access to XML or other text based formats. However, sometimes you only have access to PDFs. In this case you want to extract text from PDFs. `fulltext` can help with that. + +You can extract from any pdf from a file path, like: + + +```r +path <- system.file("examples", "example1.pdf", package = "fulltext") +ft_extract(path) +#> /Library/Frameworks/R.framework/Versions/3.2/Resources/library/fulltext/examples/example1.pdf +#> Pages: 18 +#> Title: Suffering and mental health among older people living in nursing homes---a mixed-methods study +#> Producer: pdfTeX-1.40.10 +#> Creation date: 2015-07-17 +``` + +Let's search for articles from arXiv, a preprint service. Here, get pdf from an article with ID `cond-mat/9309029`: + + +```r +res <- ft_get('cond-mat/9309029', from = "arxiv") +res2 <- ft_extract(res) +res2$arxiv$data +#> $backend +#> NULL +#> +#> $path +#> $path$`cond-mat/9309029` +#> [1] "~/.fulltext/cond-mat_9309029.pdf" +#> +#> +#> $data +#> $data[[1]] +#> /Users/sacmac/.fulltext/cond-mat_9309029.pdf +#> Pages: 14 +#> Title: arXiv:cond-mat/9309029v8 26 Jan 1994 +#> Producer: GPL Ghostscript SVN PRE-RELEASE 8.62 +#> Creation date: 2008-02-06 +``` + +And a short snippet of the full text + + +```r +res2$arxiv$data$data[[1]]$data +#> "arXiv:cond-mat/9309029v8 26 Jan 1994, , FERMILAB-PUB-93/15-T March 1993, Revised: +#> January 1994, The Thermodynamics and Economics of Waste, Dallas C. Kennedy, Research +#> Associate, Fermi National Accelerator Laboratory, P.O. Box 500 MS106, Batavia, Illinois +#> 60510 USA, Abstract, The increasingly relevant problem of natural resource use and +#> waste production, disposal, and reuse is examined from several viewpoints: economic, +#> technical, and thermodynamic. Alternative economies are studied, with emphasis on +#> recycling of waste to close the natural resource cycle. The physical nature of human +#> economies and constraints on recycling and energy efficiency are stated in terms +#> ..." +``` + +## Extract text chunks + +We have a few functions to help you pull out certain parts of an article. For example, perhaps you want to get just the authors from your articles, or just the abstracts. + +Here, we'll search for some PLOS articles, then get their full text, then extract various parts of each article with `chunks()`. + + +```r +res <- ft_search(query = "ecology", from = "plos") +(x <- ft_get(res)) +#> [Docs] 8 +#> [Source] R session +#> [IDs] 10.1371/journal.pone.0059813 10.1371/journal.pone.0001248 +#> 10.1371/journal.pone.0080763 10.1371/journal.pone.0102437 +#> 10.1371/journal.pone.0017342 10.1371/journal.pone.0091497 +#> 10.1371/journal.pone.0092931 10.1371/journal.pcbi.1003594 ... +``` + +Extract DOIs + + +```r +x %>% chunks("doi") +#> $plos +#> $plos$`10.1371/journal.pone.0059813` +#> $plos$`10.1371/journal.pone.0059813`$doi +#> [1] "10.1371/journal.pone.0059813" +#> +#> +#> $plos$`10.1371/journal.pone.0001248` +#> $plos$`10.1371/journal.pone.0001248`$doi +#> [1] "10.1371/journal.pone.0001248" +#> +#> +#> $plos$`10.1371/journal.pone.0080763` +#> $plos$`10.1371/journal.pone.0080763`$doi +#> [1] "10.1371/journal.pone.0080763" +#> +#> +#> $plos$`10.1371/journal.pone.0102437` +#> $plos$`10.1371/journal.pone.0102437`$doi +#> [1] "10.1371/journal.pone.0102437" +#> +#> +#> $plos$`10.1371/journal.pone.0017342` +#> $plos$`10.1371/journal.pone.0017342`$doi +#> [1] "10.1371/journal.pone.0017342" +#> +#> +#> $plos$`10.1371/journal.pone.0091497` +#> $plos$`10.1371/journal.pone.0091497`$doi +#> [1] "10.1371/journal.pone.0091497" +#> +#> +#> $plos$`10.1371/journal.pone.0092931` +#> $plos$`10.1371/journal.pone.0092931`$doi +#> [1] "10.1371/journal.pone.0092931" +#> +#> +#> $plos$`10.1371/journal.pcbi.1003594` +#> $plos$`10.1371/journal.pcbi.1003594`$doi +#> [1] "10.1371/journal.pcbi.1003594" +``` + +Extract DOIs and categories + + +```r +x %>% chunks(c("doi","categories")) +#> $plos +#> $plos$`10.1371/journal.pone.0059813` +#> $plos$`10.1371/journal.pone.0059813`$doi +#> [1] "10.1371/journal.pone.0059813" +#> +#> $plos$`10.1371/journal.pone.0059813`$categories +#> [1] "Research Article" "Biology" +#> [3] "Ecology" "Community ecology" +#> [5] "Species interactions" "Science policy" +#> [7] "Research assessment" "Research monitoring" +#> [9] "Research funding" "Government funding of science" +#> [11] "Research laboratories" "Science policy and economics" +#> [13] "Science and technology workforce" "Careers in research" +#> [15] "Social and behavioral sciences" "Sociology" +#> [17] "Sociology of knowledge" +#> +#> +#> $plos$`10.1371/journal.pone.0001248` +#> $plos$`10.1371/journal.pone.0001248`$doi +#> [1] "10.1371/journal.pone.0001248" +#> +#> $plos$`10.1371/journal.pone.0001248`$categories +#> [1] "Research Article" "Ecology" +#> [3] "Ecology/Ecosystem Ecology" "Ecology/Evolutionary Ecology" +#> [5] "Ecology/Theoretical Ecology" +#> +#> +#> $plos$`10.1371/journal.pone.0080763` +#> $plos$`10.1371/journal.pone.0080763`$doi +#> [1] "10.1371/journal.pone.0080763" +#> +#> $plos$`10.1371/journal.pone.0080763`$categories +#> [1] "Research Article" "Biology" "Ecology" +#> [4] "Autecology" "Behavioral ecology" "Community ecology" +#> [7] "Evolutionary ecology" "Population ecology" "Evolutionary biology" +#> [10] "Behavioral ecology" "Evolutionary ecology" "Population biology" +#> [13] "Population ecology" +#> +#> +#> $plos$`10.1371/journal.pone.0102437` +#> $plos$`10.1371/journal.pone.0102437`$doi +#> [1] "10.1371/journal.pone.0102437" +#> +#> $plos$`10.1371/journal.pone.0102437`$categories +#> [1] "Research Article" +#> [2] "Biology and life sciences" +#> [3] "Biogeography" +#> [4] "Ecology" +#> [5] "Ecosystems" +#> [6] "Ecosystem engineering" +#> [7] "Ecosystem functioning" +#> [8] "Industrial ecology" +#> [9] "Spatial and landscape ecology" +#> [10] "Urban ecology" +#> [11] "Computer and information sciences" +#> [12] "Geoinformatics" +#> [13] "Spatial analysis" +#> [14] "Earth sciences" +#> [15] "Geography" +#> [16] "Human geography" +#> [17] "Cultural geography" +#> [18] "Social geography" +#> [19] "Ecology and environmental sciences" +#> [20] "Conservation science" +#> [21] "Environmental protection" +#> [22] "Nature-society interactions" +#> +#> +#> $plos$`10.1371/journal.pone.0017342` +#> $plos$`10.1371/journal.pone.0017342`$doi +#> [1] "10.1371/journal.pone.0017342" +#> +#> $plos$`10.1371/journal.pone.0017342`$categories +#> [1] "Research Article" "Biology" "Ecology" +#> [4] "Community ecology" "Community assembly" "Community structure" +#> [7] "Niche construction" "Ecological metrics" "Species diversity" +#> [10] "Species richness" "Biodiversity" "Biogeography" +#> [13] "Population ecology" "Mathematics" "Statistics" +#> [16] "Biostatistics" "Statistical theories" "Ecology" +#> [19] "Mathematics" +#> +#> +#> $plos$`10.1371/journal.pone.0091497` +#> $plos$`10.1371/journal.pone.0091497`$doi +#> [1] "10.1371/journal.pone.0091497" +#> +#> $plos$`10.1371/journal.pone.0091497`$categories +#> [1] "Correction" +#> +#> +#> $plos$`10.1371/journal.pone.0092931` +#> $plos$`10.1371/journal.pone.0092931`$doi +#> [1] "10.1371/journal.pone.0092931" +#> +#> $plos$`10.1371/journal.pone.0092931`$categories +#> [1] "Correction" +#> +#> +#> $plos$`10.1371/journal.pcbi.1003594` +#> $plos$`10.1371/journal.pcbi.1003594`$doi +#> [1] "10.1371/journal.pcbi.1003594" +#> +#> $plos$`10.1371/journal.pcbi.1003594`$categories +#> [1] "Research Article" "Biology and life sciences" +#> [3] "Computational biology" "Microbiology" +#> [5] "Theoretical biology" +``` + +`tabularize` attempts to help you put the data that comes out of `chunks()` in to a `data.frame`, that we all know and love. + + +```r +x %>% chunks(c("doi", "history")) %>% tabularize() +#> $plos +#> doi history.received history.accepted +#> 1 10.1371/journal.pone.0059813 2012-09-16 2013-02-19 +#> 2 10.1371/journal.pone.0001248 2007-07-02 2007-11-06 +#> 3 10.1371/journal.pone.0080763 2013-08-15 2013-10-16 +#> 4 10.1371/journal.pone.0102437 2013-11-27 2014-06-19 +#> 5 10.1371/journal.pone.0017342 2010-08-24 2011-01-31 +#> 6 10.1371/journal.pone.0091497 +#> 7 10.1371/journal.pone.0092931 +#> 8 10.1371/journal.pcbi.1003594 2014-01-09 2014-03-14 +``` + + +## Bring it all together + +With the pieces above, let's see what it looks like all in one go. Here, we'll search for articles on _climate change_, then visualize word usage in those articles. + +### Search + + +```r +(out <- ft_search(query = 'climate change', from = 'plos', limit = 100)) +#> Query: +#> [climate change] +#> Found: +#> [PLoS: 11737; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] +#> Returned: +#> [PLoS: 100; BMC: 0; Crossref: 0; Entrez: 0; arxiv: 0; biorxiv: 0] +``` + +### Get full text + + +```r +(texts <- ft_get(out)) +#> [Docs] 99 +#> [Source] R session +#> [IDs] 10.1371/journal.pone.0054839 10.1371/journal.pone.0045683 +#> 10.1371/journal.pone.0050182 10.1371/journal.pone.0118489 +#> 10.1371/journal.pone.0053646 10.1371/journal.pone.0015103 +#> 10.1371/journal.pone.0008320 10.1371/journal.pmed.1001227 +#> 10.1371/journal.pmed.1001374 10.1371/journal.pone.0097480 ... +``` + +Because PLOS returns XML, we don't need to do a PDF extraction step. However, if we got full text from arXiv or bioRxiv, we'd need to extract from PDFs first. + +### Pull out chunks + + +```r +abs <- texts %>% chunks("abstract") +``` + +Let's pull out just the text + + +```r +abs <- lapply(abs$plos, function(z) { + paste0(z$abstract, collapse = " ") +}) +``` + +### Analyze + +Using the `tm` package, we can analyze our articles + + +```r +library("tm") +corp <- VCorpus(VectorSource(abs)) +# remove stop words, strip whitespace, remove punctuation +corp <- tm_map(corp, removeWords, stopwords("english")) +corp <- tm_map(corp, stripWhitespace) +corp <- tm_map(corp, removePunctuation) +# Make a term document matrix +tdm <- TermDocumentMatrix(corp) +# remove sparse terms +tdm <- removeSparseTerms(tdm, sparse = 0.8) +# get data +rs <- rowSums(as.matrix(tdm)) +df <- data.frame(word = names(rs), n = unname(rs), stringsAsFactors = FALSE) +``` + +### Visualize + + +```r +library("ggplot2") +ggplot(df, aes(reorder(word, n), n)) + + geom_point() + + coord_flip() + + labs(y = "Count", x = "Word") +``` + +![plot of chunk unnamed-chunk-23](/public/img/2015-08-07-full-text/unnamed-chunk-23-1.png)

    @@ -500,41 +774,49 @@

    -

    NOAA provides a lot of weather data, across many different websites under different project names. The R package rnoaa accesses many of these, including:

    + NOAA provides a lot of weather data, across many different websites under different project names. The R package `rnoaa` accesses many of these, including: + +* NOAA NCDC climate data, using the [NCDC API version 2](http://www.ncdc.noaa.gov/cdo-web/webservices/v2) +* [GHCND FTP data](ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/) +* [ISD FTP data](ftp://ftp.ncdc.noaa.gov/pub/data/noaa/) +* Severe weather data docs are at [http://www.ncdc.noaa.gov/swdiws/](http://www.ncdc.noaa.gov/swdiws/) +* [Sea ice data](ftp://sidads.colorado.edu/DATASETS/NOAA/G02135/shapefiles) +* [NOAA buoy data](http://www.ndbc.noaa.gov/) +* Tornadoes! Data from the [NOAA Storm Prediction Center](http://www.spc.noaa.gov/gis/svrgis/) +* HOMR - Historical Observing Metadata Repository - from [NOAA NCDC](http://www.ncdc.noaa.gov/homr/api) +* Storm data - from the [International Best Track Archive for Climate Stewardship (IBTrACS)](http://www.ncdc.noaa.gov/ibtracs/index.php?name=wmo-data) + +`rnoaa` used to provide access to [ERDDAP servers](http://upwell.pfeg.noaa.gov/erddap/index.html), but a separate package [rerddap](https://github.com/ropensci/rerddap) focuses on just those data sources. + +We focus on getting you the data, so there's very little in `rnoaa` for visualizing, statistics, etc. + +## Installation + +The newest version should be on CRAN in the next few days. In the meantime, let's install from GitHub + + +```r +devtools::install_github("ropensci/rnoaa") +``` + - +```r +library("rnoaa") +``` -

    rnoaa used to provide access to ERDDAP servers, but a separate package rerddap focuses on just those data sources.

    +There's an example using the `lawn`, `sp`, and `dplyr` packages. If you want to try those, install like -

    We focus on getting you the data, so there's very little in rnoaa for visualizing, statistics, etc.

    -

    Installation

    +```r +install.packages(c("lawn", "dplyr", "sp")) +``` -

    The newest version should be on CRAN in the next few days. In the meantime, let's install from GitHub

    -
    devtools::install_github("ropensci/rnoaa")
    -
    library("rnoaa")
    -
    -

    There's an example using the lawn, sp, and dplyr packages. If you want to try those, install like

    -
    install.packages(c("lawn", "dplyr", "sp"))
    -
    -

    NCDC

    +## NCDC - +* NCDC = National Climatic Data Center +* Data comes from a RESTful API described at [http://www.ncdc.noaa.gov/cdo-web/webservices/v2](http://www.ncdc.noaa.gov/cdo-web/webservices/v2) -

    This web service requires an API key - get one at http://www.ncdc.noaa.gov/cdo-web/token if you don't already have one. NCDC provides access to many different datasets:

    +This web service requires an API key - get one at [http://www.ncdc.noaa.gov/cdo-web/token](http://www.ncdc.noaa.gov/cdo-web/token) if you don't already have one. NCDC provides access to many different datasets: @@ -615,704 +897,444 @@

    NCDC

    -

    The main function to get data from NCDC is ncdc(). datasetid, startdate, and enddate are required parameters. A quick example, here getting data from the GHCND dataset, from a particular station, and from Oct 1st 2013 to Dec 12th 2013:

    -
    ncdc(datasetid = 'GHCND', stationid = 'GHCND:USW00014895', startdate = '2013-10-01',
    -   enddate = '2013-12-01')
    -#> $meta
    -#> $meta$totalCount
    -#> [1] 697
    -#> 
    -#> $meta$pageCount
    -#> [1] 25
    -#> 
    -#> $meta$offset
    -#> [1] 1
    -#> 
    -#> 
    -#> $data
    -#> Source: local data frame [25 x 8]
    -#> 
    -#>                   date datatype           station value fl_m fl_q fl_so
    -#> 1  2013-10-01T00:00:00     AWND GHCND:USW00014895    29               W
    -#> 2  2013-10-01T00:00:00     PRCP GHCND:USW00014895     0               W
    -#> 3  2013-10-01T00:00:00     SNOW GHCND:USW00014895     0               W
    -#> 4  2013-10-01T00:00:00     SNWD GHCND:USW00014895     0               W
    -#> 5  2013-10-01T00:00:00     TAVG GHCND:USW00014895   179    H          S
    -#> 6  2013-10-01T00:00:00     TMAX GHCND:USW00014895   250               W
    -#> 7  2013-10-01T00:00:00     TMIN GHCND:USW00014895   133               W
    -#> 8  2013-10-01T00:00:00     WDF2 GHCND:USW00014895   210               W
    -#> 9  2013-10-01T00:00:00     WDF5 GHCND:USW00014895   230               W
    -#> 10 2013-10-01T00:00:00     WSF2 GHCND:USW00014895    76               W
    -#> ..                 ...      ...               ...   ...  ...  ...   ...
    -#> Variables not shown: fl_t (chr)
    -#> 
    -#> attr(,"class")
    -#> [1] "ncdc_data"
    -
    -

    You probably won't know what station you want data from off hand though, so you can first search for stations, in this example using a bounding box that defines a rectangular area near Seattle

    -
    library("lawn")
    -lawn_bbox_polygon(c(-122.2047, 47.5204, -122.1065, 47.6139)) %>% view
    -
    -

    lawnplot

    - -

    We'll search within that bounding box for weather stations.

    -
    ncdc_stations(extent = c(47.5204, -122.2047, 47.6139, -122.1065))
    -#> $meta
    -#> $meta$totalCount
    -#> [1] 9
    -#> 
    -#> $meta$pageCount
    -#> [1] 25
    -#> 
    -#> $meta$offset
    -#> [1] 1
    -#> 
    -#> 
    -#> $data
    -#> Source: local data frame [9 x 9]
    -#> 
    -#>   elevation    mindate    maxdate latitude                         name
    -#> 1     199.6 2008-06-01 2015-06-29  47.5503      EASTGATE 1.7 SSW, WA US
    -#> 2     240.8 2010-05-01 2015-07-05  47.5604       EASTGATE 1.1 SW, WA US
    -#> 3      85.6 2008-07-01 2015-07-05  47.5916        BELLEVUE 0.8 S, WA US
    -#> 4     104.2 2008-06-01 2015-07-05  47.5211 NEWPORT HILLS 1.9 SSE, WA US
    -#> 5      58.5 2008-08-01 2009-04-12  47.6138      BELLEVUE 2.3 ENE, WA US
    -#> 6     199.9 2008-06-01 2009-11-22  47.5465   NEWPORT HILLS 1.4 E, WA US
    -#> 7      27.1 2008-07-01 2015-07-05  47.6046        BELLEVUE 1.8 W, WA US
    -#> 8     159.4 2008-11-01 2015-07-05  47.5694      BELLEVUE 2.3 SSE, WA US
    -#> 9      82.3 2008-12-01 2010-09-17  47.6095       BELLEVUE 0.6 NE, WA US
    -#> Variables not shown: datacoverage (dbl), id (chr), elevationUnit (chr),
    -#>   longitude (dbl)
    -#> 
    -#> attr(,"class")
    -#> [1] "ncdc_stations"
    -
    -

    And there are 9 found. We could then use their station ids (e.g., GHCND:US1WAKG0024) to search for data using ncdc(), or search for what kind of data that station has with ncdc_datasets(), or other functions.

    - -

    GHCND

    - -
      -
    • GHCND = Global Historical Climatology Network Daily (Data)
    • -
    • Data comes from an FTP server
    • -
    -
    library("dplyr")
    -dat <- ghcnd(stationid = "AGE00147704")
    -dat$data %>%
    -  filter(element == "PRCP", year == 1909)
    -#>            id year month element VALUE1 MFLAG1 QFLAG1 SFLAG1 VALUE2 MFLAG2
    -#> 1 AGE00147704 1909    11    PRCP  -9999     NA                -9999     NA
    -#> 2 AGE00147704 1909    12    PRCP     23     NA             E      0     NA
    -#>   QFLAG2 SFLAG2 VALUE3 MFLAG3 QFLAG3 SFLAG3 VALUE4 MFLAG4 QFLAG4 SFLAG4
    -#> 1                -9999     NA                -9999     NA              
    -#> 2             E      0     NA             E      0     NA             E
    -#>   VALUE5 MFLAG5 QFLAG5 SFLAG5 VALUE6 MFLAG6 QFLAG6 SFLAG6 VALUE7 MFLAG7
    -#> 1  -9999     NA                -9999     NA                -9999     NA
    -#> 2      0     NA             E      0     NA             E      0     NA
    -#>   QFLAG7 SFLAG7 VALUE8 MFLAG8 QFLAG8 SFLAG8 VALUE9 MFLAG9 QFLAG9 SFLAG9
    -#> 1     NA         -9999     NA                -9999     NA              
    -#> 2     NA      E    250     NA             E     75     NA             E
    -#>   VALUE10 MFLAG10 QFLAG10 SFLAG10 VALUE11 MFLAG11 QFLAG11 SFLAG11 VALUE12
    -#> 1   -9999      NA                   -9999      NA                   -9999
    -#> 2     131      NA               E       0      NA               E       0
    -#>   MFLAG12 QFLAG12 SFLAG12 VALUE13 MFLAG13 QFLAG13 SFLAG13 VALUE14 MFLAG14
    -#> 1      NA                   -9999      NA                   -9999      NA
    -#> 2      NA               E       0      NA               E       0      NA
    -#>   QFLAG14 SFLAG14 VALUE15 MFLAG15 QFLAG15 SFLAG15 VALUE16 MFLAG16 QFLAG16
    -#> 1                   -9999      NA                   -9999      NA        
    -#> 2               E       0      NA               E       0      NA        
    -#>   SFLAG16 VALUE17 MFLAG17 QFLAG17 SFLAG17 VALUE18 MFLAG18 QFLAG18 SFLAG18
    -#> 1           -9999      NA                   -9999      NA                
    -#> 2       E       0      NA               E       0      NA               E
    -#>   VALUE19 MFLAG19 QFLAG19 SFLAG19 VALUE20 MFLAG20 QFLAG20 SFLAG20 VALUE21
    -#> 1   -9999      NA      NA           -9999      NA      NA           -9999
    -#> 2       0      NA      NA       E       0      NA      NA       E       0
    -#>   MFLAG21 QFLAG21 SFLAG21 VALUE22 MFLAG22 QFLAG22 SFLAG22 VALUE23 MFLAG23
    -#> 1      NA                   -9999      NA                      22      NA
    -#> 2      NA               E       0      NA               E       0      NA
    -#>   QFLAG23 SFLAG23 VALUE24 MFLAG24 QFLAG24 SFLAG24 VALUE25 MFLAG25 QFLAG25
    -#> 1      NA       E       9      NA      NA       E       5      NA      NA
    -#> 2      NA       E       0      NA      NA       E       0      NA      NA
    -#>   SFLAG25 VALUE26 MFLAG26 QFLAG26 SFLAG26 VALUE27 MFLAG27 QFLAG27 SFLAG27
    -#> 1       E       0      NA               E      86      NA      NA       E
    -#> 2       E       0      NA               E       0      NA      NA       E
    -#>   VALUE28 MFLAG28 QFLAG28 SFLAG28 VALUE29 MFLAG29 QFLAG29 SFLAG29 VALUE30
    -#> 1       0      NA      NA       E      28      NA      NA       E       0
    -#> 2       0      NA      NA       E       0      NA      NA       E       0
    -#>   MFLAG30 QFLAG30 SFLAG30 VALUE31 MFLAG31 QFLAG31 SFLAG31
    -#> 1      NA               E   -9999      NA      NA        
    -#> 2      NA               E      57      NA      NA       E
    -
    -

    You can also get to datasets by searching by station id, date min, date max, and variable. E.g.

    -
    ghcnd_search("AGE00147704", var = "PRCP")
    -#> $prcp
    -#> Source: local data frame [9,803 x 6]
    -#> 
    -#>             id  prcp       date mflag qflag sflag
    -#> 1  AGE00147704 -9999 1909-11-01    NA            
    -#> 2  AGE00147704    23 1909-12-01    NA           E
    -#> 3  AGE00147704    81 1910-01-01    NA           E
    -#> 4  AGE00147704     0 1910-02-01    NA           E
    -#> 5  AGE00147704    18 1910-03-01    NA           E
    -#> 6  AGE00147704     0 1910-04-01    NA           E
    -#> 7  AGE00147704   223 1910-05-01    NA           E
    -#> 8  AGE00147704     0 1910-06-01    NA           E
    -#> 9  AGE00147704     0 1910-07-01    NA           E
    -#> 10 AGE00147704     0 1910-08-01    NA           E
    -#> ..         ...   ...        ...   ...   ...   ...
    -
    -

    ISD

    - -
      -
    • ISD = Integrated Surface Database
    • -
    • Data comes from an FTP server
    • -
    - -

    You'll likely first want to run isd_stations() to get list of stations

    -
    stations <- isd_stations()
    -head(stations)
    -#>   usaf  wban station_name ctry state icao lat lon elev_m    begin      end
    -#> 1 7005 99999   CWOS 07005                  NA  NA     NA 20120127 20120127
    -#> 2 7011 99999   CWOS 07011                  NA  NA     NA 20111025 20121129
    -#> 3 7018 99999   WXPOD 7018                   0   0   7018 20110309 20130730
    -#> 4 7025 99999   CWOS 07025                  NA  NA     NA 20120127 20120127
    -#> 5 7026 99999   WXPOD 7026   AF              0   0   7026 20120713 20141120
    -#> 6 7034 99999   CWOS 07034                  NA  NA     NA 20121024 20121106
    -
    -

    Then get data from particular stations, like

    -
    (res <- isd(usaf = "011490", wban = "99999", year = 1986))
    -#> <ISD Data>
    -#> Size: 1328 X 85
    -#> 
    -#>    total_chars usaf_station wban_station     date time date_flag latitude
    -#> 1           50        11490        99999 19860101    0         4    66267
    -#> 2          123        11490        99999 19860101  600         4    66267
    -#> 3           50        11490        99999 19860101 1200         4    66267
    -#> 4           94        11490        99999 19860101 1800         4    66267
    -#> 5           50        11490        99999 19860102    0         4    66267
    -#> 6          123        11490        99999 19860102  600         4    66267
    -#> 7           50        11490        99999 19860102 1200         4    66267
    -#> 8           94        11490        99999 19860102 1800         4    66267
    -#> 9           50        11490        99999 19860103    0         4    66267
    -#> 10         123        11490        99999 19860103  600         4    66267
    -#> ..         ...          ...          ...      ...  ...       ...      ...
    -#> Variables not shown: longitude (int), type_code (chr), elevation (int),
    -#>      call_letter (int), quality (chr), wind_direction (int),
    -#>      wind_direction_quality (int), wind_code (chr), wind_speed (int),
    -#>      wind_speed_quality (int), ceiling_height (int),
    -#>      ceiling_height_quality (int), ceiling_height_determination (chr),
    -#>      ceiling_height_cavok (chr), visibility_distance (int),
    -#>      visibility_distance_quality (int), visibility_code (chr),
    -#>      visibility_code_quality (int), temperature (int), temperature_quality
    -#>      (int), temperature_dewpoint (int), temperature_dewpoint_quality
    -#>      (int), air_pressure (int), air_pressure_quality (int),
    -#>      AG1.precipitation (chr), AG1.discrepancy (int), AG1.est_water_depth
    -#>      (int), GF1.sky_condition (chr), GF1.coverage (int),
    -#>      GF1.opaque_coverage (int), GF1.coverage_quality (int),
    -#>      GF1.lowest_cover (int), GF1.lowest_cover_quality (int),
    -#>      GF1.low_cloud_genus (int), GF1.low_cloud_genus_quality (int),
    -#>      GF1.lowest_cloud_base_height (int),
    -#>      GF1.lowest_cloud_base_height_quality (int), GF1.mid_cloud_genus
    -#>      (int), GF1.mid_cloud_genus_quality (int), GF1.high_cloud_genus (int),
    -#>      GF1.high_cloud_genus_quality (int), MD1.atmospheric_change (chr),
    -#>      MD1.tendency (int), MD1.tendency_quality (int), MD1.three_hr (int),
    -#>      MD1.three_hr_quality (int), MD1.twentyfour_hr (int),
    -#>      MD1.twentyfour_hr_quality (int), REM.remarks (chr), REM.identifier
    -#>      (chr), REM.length_quantity (int), REM.comment (chr), KA1.extreme_temp
    -#>      (chr), KA1.period_quantity (int), KA1.max_min (chr), KA1.temp (int),
    -#>      KA1.temp_quality (int), AY1.manual_occurrence (chr),
    -#>      AY1.condition_code (int), AY1.condition_quality (int), AY1.period
    -#>      (int), AY1.period_quality (int), AY2.manual_occurrence (chr),
    -#>      AY2.condition_code (int), AY2.condition_quality (int), AY2.period
    -#>      (int), AY2.period_quality (int), MW1.first_weather_reported (chr),
    -#>      MW1.condition (int), MW1.condition_quality (int),
    -#>      EQD.observation_identifier (chr), EQD.observation_text (int),
    -#>      EQD.reason_code (int), EQD.parameter (chr),
    -#>      EQD.observation_identifier.1 (chr), EQD.observation_text.1 (int),
    -#>      EQD.reason_code.1 (int), EQD.parameter.1 (chr)
    -
    -

    Severe weather

    - -
      -
    • SWDI = Severe Weather Data Inventory
    • -
    • From the SWDI site
    • -
    - -
    -

    The Severe Weather Data Inventory (SWDI) is an integrated database of severe weather records for the United States. The records in SWDI come from a variety of sources in the NCDC archive.

    -
    - -

    The swdi() function allows you to get data in xml, csv, shp, or kmz format. You can get data from many different datasets:

    - -
      -
    • nx3tvs NEXRAD Level-3 Tornado Vortex Signatures (point)
    • -
    • nx3meso NEXRAD Level-3 Mesocyclone Signatures (point)
    • -
    • nx3hail NEXRAD Level-3 Hail Signatures (point)
    • -
    • nx3structure NEXRAD Level-3 Storm Cell Structure Information (point)
    • -
    • plsr Preliminary Local Storm Reports (point)
    • -
    • warn Severe Thunderstorm, Tornado, Flash Flood and Special Marine warnings (polygon)
    • -
    • nldn Lightning strikes from Vaisala (.gov and .mil ONLY) (point)
    • -
    - -

    An example: Get all plsr within the bounding box (-91,30,-90,31)

    -
    swdi(dataset = 'plsr', startdate = '20060505', enddate = '20060510',
    -bbox = c(-91, 30, -90, 31))
    -#> $meta
    -#> $meta$totalCount
    -#> numeric(0)
    -#> 
    -#> $meta$totalTimeInSeconds
    -#> [1] 0
    -#> 
    -#> 
    -#> $data
    -#> Source: local data frame [5 x 8]
    -#> 
    -#>                  ztime     id        event magnitude            city
    -#> 1 2006-05-09T02:20:00Z 427540         HAIL         1    5 E KENTWOOD
    -#> 2 2006-05-09T02:40:00Z 427536         HAIL         1    MOUNT HERMAN
    -#> 3 2006-05-09T02:40:00Z 427537 TSTM WND DMG     -9999    MOUNT HERMAN
    -#> 4 2006-05-09T03:00:00Z 427199         HAIL         0     FRANKLINTON
    -#> 5 2006-05-09T03:17:00Z 427200      TORNADO     -9999 5 S FRANKLINTON
    -#> Variables not shown: county (chr), state (chr), source (chr)
    -#> 
    -#> $shape
    -#>                  shape
    -#> 1 POINT (-90.43 30.93)
    -#> 2  POINT (-90.3 30.96)
    -#> 3  POINT (-90.3 30.96)
    -#> 4 POINT (-90.14 30.85)
    -#> 5 POINT (-90.14 30.78)
    -#> 
    -#> attr(,"class")
    -#> [1] "swdi"
    -
    -

    Sea ice

    - -

    The seaice() function simply grabs shape files that describe sea ice cover at the Northa and South poles, and can be useful for examining change through time in sea ice cover, among other things.

    - -

    An example: Plot sea ice cover for April 1990 for the North pole.

    -
    urls <- seaiceeurls(mo = 'Apr', pole = 'N', yr = 1990)
    -out <- seaice(urls)
    -
    -library('ggplot2')
    -ggplot(out, aes(long, lat, group = group)) +
    -   geom_polygon(fill = "steelblue") +
    -   theme_ice()
    -
    -

    plot of chunk unnamed-chunk-13

    - -

    Buoys

    - -
      -
    • Get NOAA buoy data from the National Buoy Data Center
    • -
    - -

    Using buoy data requires the ncdf package. Make sure you have that installed, like install.packages("ncdf"). buoy() and buoys() will fail if you don't have ncdf installed.

    - -

    buoys() - Get available buoys given a dataset name

    -
    head(buoys(dataset = 'cwind'))
    -#>      id
    -#> 1 41001
    -#> 2 41002
    -#> 3 41004
    -#> 4 41006
    -#> 5 41008
    -#> 6 41009
    -#>                                                                       url
    -#> 1 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41001/catalog.html
    -#> 2 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41002/catalog.html
    -#> 3 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41004/catalog.html
    -#> 4 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41006/catalog.html
    -#> 5 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41008/catalog.html
    -#> 6 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41009/catalog.html
    -
    -

    buoy() - Get data for a buoy - if no year or datatype specified, we get the first file

    -
    buoy(dataset = 'cwind', buoyid = 46085)
    -#> Dimensions (rows/cols): [33486 X 5] 
    -#> 2 variables: [wind_dir, wind_spd] 
    -#> 
    -#>                    time latitude longitude wind_dir wind_spd
    -#> 1  2007-05-05T02:00:00Z   55.855  -142.559      331      2.8
    -#> 2  2007-05-05T02:10:00Z   55.855  -142.559      328      2.6
    -#> 3  2007-05-05T02:20:00Z   55.855  -142.559      329      2.2
    -#> 4  2007-05-05T02:30:00Z   55.855  -142.559      356      2.1
    -#> 5  2007-05-05T02:40:00Z   55.855  -142.559      360      1.5
    -#> 6  2007-05-05T02:50:00Z   55.855  -142.559       10      1.9
    -#> 7  2007-05-05T03:00:00Z   55.855  -142.559       10      2.2
    -#> 8  2007-05-05T03:10:00Z   55.855  -142.559       14      2.2
    -#> 9  2007-05-05T03:20:00Z   55.855  -142.559       16      2.1
    -#> 10 2007-05-05T03:30:00Z   55.855  -142.559       22      1.6
    -#> ..                  ...      ...       ...      ...      ...
    -
    -

    Tornadoes

    - -

    The function tornadoes() gets tornado data from http://www.spc.noaa.gov/gis/svrgis/.

    -
    shp <- tornadoes()
    -library('sp')
    -plot(shp)
    -
    -

    tornadoes

    - -

    Historical Observing Metadata Repository

    - - - -

    homr_definitions() gets you definitions and metadata for datasets

    -
    head(homr_definitions())
    -#> Source: local data frame [6 x 7]
    -#> 
    -#>   defType  abbr                fullName    displayName
    -#> 1     ids GHCND        GHCND IDENTIFIER       GHCND ID
    -#> 2     ids  COOP             COOP NUMBER        COOP ID
    -#> 3     ids  WBAN             WBAN NUMBER        WBAN ID
    -#> 4     ids   FAA FAA LOCATION IDENTIFIER         FAA ID
    -#> 5     ids  ICAO                 ICAO ID        ICAO ID
    -#> 6     ids TRANS          TRANSMITTAL ID Transmittal ID
    -#> Variables not shown: description (chr), cssaName (chr), ghcndName (chr)
    -
    -

    homr() gets you metadata for stations given query parameters. In this example, search for data for the state of Delaware

    -
    res <- homr(state = 'DE')
    -names(res) # the stations
    -#>  [1] "10001871" "10100162" "10100164" "10100166" "20004155" "20004158"
    -#>  [7] "20004160" "20004162" "20004163" "20004168" "20004171" "20004176"
    -#> [13] "20004178" "20004179" "20004180" "20004182" "20004184" "20004185"
    -#> [19] "30001831" "30017384" "30020917" "30021161" "30021998" "30022674"
    -#> [25] "30026770" "30027455" "30032423" "30032685" "30034222" "30039554"
    -#> [31] "30043742" "30046662" "30046814" "30051475" "30057217" "30063570"
    -#> [37] "30064900" "30065901" "30067636" "30069663" "30075067" "30077378"
    -#> [43] "30077857" "30077923" "30077988" "30079088" "30079240" "30082430"
    -#> [49] "30084216" "30084262" "30084537" "30084796" "30094582" "30094639"
    -#> [55] "30094664" "30094670" "30094683" "30094730" "30094806" "30094830"
    -#> [61] "30094917" "30094931" "30094936" "30094991"
    -
    -

    You can index to each one to get more data

    - -

    Storms

    - -
      -
    • Data from: International Best Track Archive for Climate Stewardship (IBTrACS)
    • -
    • Data comes from an FTP server
    • -
    - -

    Flat files (csv's) are served up as well as shp files. In this example, plot storm data for the year 1940

    -
    (res3 <- storm_shp(year = 1940))
    -#> <NOAA Storm Shp Files>
    -#> Path: ~/.rnoaa/storms/year/Year.1940.ibtracs_all_points.v03r06.shp
    -#> Basin: <NA>
    -#> Storm: <NA>
    -#> Year: 1940
    -#> Type: points
    -res3shp <- storm_shp_read(res3)
    -sp::plot(res3shp)
    -
    -

    plot of chunk unnamed-chunk-19

    - -
    - -
    -

    - - rerddap - General purpose R client for ERDDAP servers - -

    - - - -

    ERDDAP is a data server that gives you a simple, consistent way to download subsets of gridded and tabular scientific datasets in common file formats and make graphs and maps. Besides it’s own RESTful interface, much of which is designed based on OPeNDAP, ERDDAP can act as an OPeNDAP server and as a WMS server for gridded data.

    - -

    ERDDAP is a powerful tool - in a world of heterogeneous data, it's often hard to combine data and serve it through the same interface, with tools for querying/filtering/subsetting the data. That is exactly what ERDDAP does. Heterogeneous data sets often have some similarities, such as latitude/longitude data and usually a time component, but other variables vary widely.

    - -

    NetCDF

    - -

    rerddap supports NetCDF format, and is the default when using the griddap() function. We use ncdf by default, but you can choose to use ncdf4 instead.

    - -

    Caching

    - -

    Data files downloaded are cached in a single hidden directory ~/.rerddap on your machine. It's hidden so that you don't accidentally delete the data, but you can still easily delete the data if you like, open files, move them around, etc.

    - -

    When you use griddap() or tabledap() functions, we construct a MD5 hash from the base URL, and any query parameters - this way each query is separately cached. Once we have the hash, we look in ~/.rerddap for a matching hash. If there's a match we use that file on disk - if no match, we make a http request for the data to the ERDDAP server you specify.

    - -

    ERDDAP servers

    - -

    You can get a data.frame of ERDDAP servers using the function servers(). Most I think serve some kind of NOAA data, but there are a few that aren't NOAA data. Here are a few:

    -
    head(servers())
    -#>                                                                                            name
    -#> 1                                                         Marine Domain Awareness (MDA) - Italy
    -#> 2                                                                    Marine Institute - Ireland
    -#> 3                                                      CoastWatch Caribbean/Gulf of Mexico Node
    -#> 4                                                                    CoastWatch West Coast Node
    -#> 5                    NOAA IOOS CeNCOOS (Central and Northern California Ocean Observing System)
    -#> 6 NOAA IOOS NERACOOS (Northeastern Regional Association of Coastal and Ocean Observing Systems)
    -#>                                        url
    -#> 1 https://bluehub.jrc.ec.europa.eu/erddap/
    -#> 2          http://erddap.marine.ie/erddap/
    -#> 3      http://cwcgom.aoml.noaa.gov/erddap/
    -#> 4  http://coastwatch.pfeg.noaa.gov/erddap/
    -#> 5    http://erddap.axiomalaska.com/erddap/
    -#> 6          http://www.neracoos.org/erddap/
    -
    -

    Install

    - -

    From CRAN

    -
    install.packages("rerddap")
    -
    -

    Or development version from GitHub

    -
    devtools::install_github("ropensci/rerddap")
    -
    library('rerddap')
    -
    -

    Search

    - -

    First, you likely want to search for data, specifying whether to search for either griddadp or tabledap datasets. The default is griddap.

    -
    ed_search(query = 'size', which = "table")
    -#> 11 results, showing first 20 
    -#>                                                                                         title
    -#> 1                                                                          CalCOFI Fish Sizes
    -#> 2                                                                        CalCOFI Larvae Sizes
    -#> 3                Channel Islands, Kelp Forest Monitoring, Size and Frequency, Natural Habitat
    -#> 4                                                         CalCOFI Larvae Counts Positive Tows
    -#> 5                                                                                CalCOFI Tows
    -#> 7                                                  OBIS - ARGOS Satellite Tracking of Animals
    -#> 8                                                     GLOBEC NEP MOCNESS Plankton (MOC1) Data
    -#> 9                                                 GLOBEC NEP Vertical Plankton Tow (VPT) Data
    -#> 10                            NWFSC Observer Fixed Gear Data, off West Coast of US, 2002-2006
    -#> 11                                 NWFSC Observer Trawl Data, off West Coast of US, 2002-2006
    -#> 12 AN EXPERIMENTAL DATASET: Underway Sea Surface Temperature and Salinity Aboard the Oleander
    -#>             dataset_id
    -#> 1     erdCalCOFIfshsiz
    -#> 2     erdCalCOFIlrvsiz
    -#> 3       erdCinpKfmSFNH
    -#> 4  erdCalCOFIlrvcntpos
    -#> 5       erdCalCOFItows
    -#> 7            aadcArgos
    -#> 8        erdGlobecMoc1
    -#> 9         erdGlobecVpt
    -#> 10  nwioosObsFixed2002
    -#> 11  nwioosObsTrawl2002
    -#> 12            nodcPJJU
    -
    ed_search(query = 'size', which = "grid")
    -#> 6 results, showing first 20 
    -#>                                                                                                   title
    -#> 6                                                       NOAA Global Coral Bleaching Monitoring Products
    -#> 13        USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][eta_rho][xi_rho]
    -#> 14            USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][eta_u][xi_u]
    -#> 15            USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][eta_v][xi_v]
    -#> 16 USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][s_rho][eta_rho][xi_rho]
    -#> 17  USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][Nbed][eta_rho][xi_rho]
    -#>             dataset_id
    -#> 6             NOAA_DHW
    -#> 13 whoi_ed12_89ce_9592
    -#> 14 whoi_61c3_0b5d_cd61
    -#> 15 whoi_62d0_9d64_c8ff
    -#> 16 whoi_7dd7_db97_4bbe
    -#> 17 whoi_a4fb_2c9c_16a7
    -
    -

    This gives back dataset titles and identifiers - with which you should be able to get a sense for which dataset you may want to fetch.

    - -

    Information

    - -

    After searching you can get more information on a single dataset

    -
    info('whoi_62d0_9d64_c8ff')
    -#> <ERDDAP info> whoi_62d0_9d64_c8ff 
    -#>  Dimensions (range):  
    -#>      time: (2012-06-25T01:00:00Z, 2015-06-24T00:00:00Z) 
    -#>      eta_v: (0, 334) 
    -#>      xi_v: (0, 895) 
    -#>  Variables:  
    -#>      bedload_Vsand_01: 
    -#>          Units: kilogram meter-1 s-1 
    -#>      bedload_Vsand_02: 
    -#>          Units: kilogram meter-1 s-1 
    -...
    -
    -

    Which is a simple S3 list but prints out pretty, so it's easy to quickly scan the printed output and see what you need to see to proceed. That is, in the next step you want to get the dataset, and you'll want to specify your search using some combination of values for latitude, longitude, and time.

    - -

    griddap (gridded) data

    - -

    First, get information on a dataset to see time range, lat/long range, and variables.

    -
    (out <- info('noaa_esrl_027d_0fb5_5d38'))
    -#> <ERDDAP info> noaa_esrl_027d_0fb5_5d38 
    -#>  Dimensions (range):  
    -#>      time: (1850-01-01T00:00:00Z, 2014-05-01T00:00:00Z) 
    -#>      latitude: (87.5, -87.5) 
    -#>      longitude: (-177.5, 177.5) 
    -#>  Variables:  
    -#>      air: 
    -#>          Range: -20.9, 19.5 
    -#>          Units: degC
    -
    -

    Then query for gridded data using the griddap() function

    -
    (res <- griddap(out,
    -  time = c('2012-01-01', '2012-01-30'),
    -  latitude = c(21, 10),
    -  longitude = c(-80, -70)
    -))
    -#> <ERDDAP griddap> noaa_esrl_027d_0fb5_5d38
    -#>    Path: [~/.rerddap/648ed11e8b911b65e39eb63c8df339df.nc]
    -#>    Last updated: [2015-05-09 08:31:10]
    -#>    File size:    [0 mb]
    -#>    Dimensions (dims/vars):   [3 X 1]
    -#>    Dim names: time, latitude, longitude
    -#>    Variable names: CRUTEM3: Surface Air Temperature Monthly Anomaly
    -#>    data.frame (rows/columns):   [18 X 4]
    -#>                    time latitude longitude  air
    -#> 1  2012-01-01T00:00:00Z     22.5     -77.5   NA
    -#> 2  2012-01-01T00:00:00Z     22.5     -77.5   NA
    -#> 3  2012-01-01T00:00:00Z     22.5     -77.5   NA
    -#> 4  2012-01-01T00:00:00Z     22.5     -77.5 -0.1
    -#> 5  2012-01-01T00:00:00Z     22.5     -77.5   NA
    -#> 6  2012-01-01T00:00:00Z     22.5     -77.5 -0.2
    -#> 7  2012-01-01T00:00:00Z     17.5     -72.5  0.2
    -#> 8  2012-01-01T00:00:00Z     17.5     -72.5   NA
    -#> 9  2012-01-01T00:00:00Z     17.5     -72.5  0.3
    -#> 10 2012-02-01T00:00:00Z     17.5     -72.5   NA
    -#> ..                  ...      ...       ...  ...
    -
    -

    The output of griddap() is a list that you can explore further. Get the summary

    -
    res$summary
    -#> [1] "file ~/.rerddap/648ed11e8b911b65e39eb63c8df339df.nc has 3 dimensions:"
    -#> [1] "time   Size: 2"
    -#> [1] "latitude   Size: 3"
    -#> [1] "longitude   Size: 3"
    -#> [1] "------------------------"
    -#> [1] "file ~/.rerddap/648ed11e8b911b65e39eb63c8df339df.nc has 1 variables:"
    -#> [1] "float air[longitude,latitude,time]  Longname:CRUTEM3: Surface Air Temperature Monthly Anomaly Missval:-9.96920996838687e+36"
    -
    -

    Or get the dimension variables (just the names of the variables for brevity here)

    -
    names(res$summary$dim)
    -#> [1] "time"      "latitude"  "longitude"
    -
    -

    Get the data.frame (beware: you may want to just look at the head of the data.frame if large)

    -
    res$data
    -#>                    time latitude longitude   air
    -#> 1  2012-01-01T00:00:00Z     22.5     -77.5    NA
    -#> 2  2012-01-01T00:00:00Z     22.5     -77.5    NA
    -#> 3  2012-01-01T00:00:00Z     22.5     -77.5    NA
    -#> 4  2012-01-01T00:00:00Z     22.5     -77.5 -0.10
    -#> 5  2012-01-01T00:00:00Z     22.5     -77.5    NA
    -#> 6  2012-01-01T00:00:00Z     22.5     -77.5 -0.20
    -#> 7  2012-01-01T00:00:00Z     17.5     -72.5  0.20
    -#> 8  2012-01-01T00:00:00Z     17.5     -72.5    NA
    -#> 9  2012-01-01T00:00:00Z     17.5     -72.5  0.30
    -#> 10 2012-02-01T00:00:00Z     17.5     -72.5    NA
    -#> 11 2012-02-01T00:00:00Z     17.5     -72.5    NA
    -#> 12 2012-02-01T00:00:00Z     17.5     -72.5    NA
    -#> 13 2012-02-01T00:00:00Z     12.5     -67.5  0.40
    -#> 14 2012-02-01T00:00:00Z     12.5     -67.5    NA
    -#> 15 2012-02-01T00:00:00Z     12.5     -67.5  0.20
    -#> 16 2012-02-01T00:00:00Z     12.5     -67.5  0.00
    -#> 17 2012-02-01T00:00:00Z     12.5     -67.5    NA
    -#> 18 2012-02-01T00:00:00Z     12.5     -67.5  0.32
    -
    -

    You can actually still explore the original netcdf summary object, e.g.,

    -
    res$summary$dim$time
    -#> $name
    -#> [1] "time"
    -#> 
    -#> $len
    -#> [1] 2
    -#> 
    -#> $unlim
    -#> [1] FALSE
    -#> 
    -#> $id
    -#> [1] 1
    -#> 
    -#> $dimvarid
    -#> [1] 1
    -#> 
    -#> $units
    -#> [1] "seconds since 1970-01-01T00:00:00Z"
    -#> 
    -#> $vals
    -#> [1] 1325376000 1328054400
    -#> 
    -#> $create_dimvar
    -#> [1] TRUE
    -#> 
    -#> attr(,"class")
    -#> [1] "dim.ncdf"
    -
    -

    tabledap (tabular) data

    - -

    tabledap is data that is not gridded by lat/lon/time. In addition, the query interface is a bit different. Notice that you can do less than, more than, equal to type queries, but they are specified as character strings.

    -
    (out <- info('erdCalCOFIfshsiz'))
    -#> <ERDDAP info> erdCalCOFIfshsiz 
    -#>  Variables:  
    -#>      calcofi_species_code: 
    -#>          Range: 19, 1550 
    -#>      common_name: 
    -#>      cruise: 
    -#>      fish_1000m3: 
    -#>          Units: Fish per 1,000 cubic meters of water sampled 
    -#>      fish_count: 
    -#>      fish_size: 
    -...
    -
    (dat <- tabledap(out, 'time>=2001-07-07', 'time<=2001-07-10', 
    -                 fields = c('longitude', 'latitude', 'fish_size', 'itis_tsn', 'scientific_name')))
    -#> <ERDDAP tabledap> erdCalCOFIfshsiz
    -#>    Path: [~/.rerddap/f013f9ee09bdb4184928d533e575e948.csv]
    -#>    Last updated: [2015-05-09 08:31:21]
    -#>    File size:    [0.03 mb]
    -#>    Dimensions:   [558 X 5]
    -#> 
    -#>     longitude  latitude fish_size itis_tsn       scientific_name
    -#> 2     -118.26    33.255      22.9   623745 Nannobrachium ritteri
    -#> 3     -118.26    33.255      22.9   623745 Nannobrachium ritteri
    -#> 4  -118.10667 32.738335      31.5   623625  Lipolagus ochotensis
    -#> 5  -118.10667 32.738335      48.3   623625  Lipolagus ochotensis
    -#> 6  -118.10667 32.738335      15.5   162221 Argyropelecus sladeni
    -#> 7  -118.10667 32.738335      16.3   162221 Argyropelecus sladeni
    -#> 8  -118.10667 32.738335      17.8   162221 Argyropelecus sladeni
    -#> 9  -118.10667 32.738335      18.2   162221 Argyropelecus sladeni
    -#> 10 -118.10667 32.738335      19.2   162221 Argyropelecus sladeni
    -#> 11 -118.10667 32.738335      20.0   162221 Argyropelecus sladeni
    -#> ..        ...       ...       ...      ...                   ...
    -
    -

    Since both griddap() and tabledap() give back data.frame's, it's easy to do downstream manipulation. For example, we can use dplyr to filter, summarize, group, and sort:

    -
    library("dplyr")
    -dat$fish_size <- as.numeric(dat$fish_size)
    -df <- tbl_df(dat) %>% 
    -  filter(fish_size > 30) %>% 
    -  group_by(scientific_name) %>% 
    -  summarise(mean_size = mean(fish_size)) %>% 
    -  arrange(desc(mean_size))
    -df
    -#> Source: local data frame [20 x 2]
    -#> 
    -#>                 scientific_name mean_size
    -#> 1       Idiacanthus antrostomus 253.00000
    -#> 2            Stomias atriventer 189.25000
    -#> 3            Lestidiops ringens  98.70000
    -#> 4     Tarletonbeania crenularis  56.50000
    -#> 5      Ceratoscopelus townsendi  53.70000
    -#> 6     Stenobrachius leucopsarus  47.74538
    -#> 7               Sardinops sagax  47.00000
    -#> 8         Nannobrachium ritteri  43.30250
    -#> 9         Bathylagoides wesethi  43.09167
    -#> 10         Vinciguerria lucetia  42.00000
    -#> 11       Cyclothone acclinidens  40.80000
    -#> 12         Lipolagus ochotensis  39.72500
    -#> 13        Leuroglossus stilbius  38.35385
    -#> 14        Triphoturus mexicanus  38.21342
    -#> 15                Diaphus theta  37.88571
    -#> 16       Trachipterus altivelis  37.70000
    -#> 17 Symbolophorus californiensis  37.66000
    -#> 18         Nannobrachium regale  37.50000
    -#> 19         Merluccius productus  36.61333
    -#> 20        Argyropelecus sladeni  32.43333
    -
    -

    Then make a cute little plot

    -
    library("ggplot2")
    -ggplot(df, aes(reorder(scientific_name, mean_size), mean_size)) +
    -  geom_bar(stat = "identity") +
    -  coord_flip() + 
    -  theme_grey(base_size = 20) +
    -  labs(y = "Mean Size", x = "Species")
    -
    -

    plot of chunk unnamed-chunk-19

    +The main function to get data from NCDC is `ncdc()`. `datasetid`, `startdate`, and `enddate` are required parameters. A quick example, here getting data from the GHCND dataset, from a particular station, and from Oct 1st 2013 to Dec 12th 2013: + + +```r +ncdc(datasetid = 'GHCND', stationid = 'GHCND:USW00014895', startdate = '2013-10-01', + enddate = '2013-12-01') +#> $meta +#> $meta$totalCount +#> [1] 697 +#> +#> $meta$pageCount +#> [1] 25 +#> +#> $meta$offset +#> [1] 1 +#> +#> +#> $data +#> Source: local data frame [25 x 8] +#> +#> date datatype station value fl_m fl_q fl_so +#> 1 2013-10-01T00:00:00 AWND GHCND:USW00014895 29 W +#> 2 2013-10-01T00:00:00 PRCP GHCND:USW00014895 0 W +#> 3 2013-10-01T00:00:00 SNOW GHCND:USW00014895 0 W +#> 4 2013-10-01T00:00:00 SNWD GHCND:USW00014895 0 W +#> 5 2013-10-01T00:00:00 TAVG GHCND:USW00014895 179 H S +#> 6 2013-10-01T00:00:00 TMAX GHCND:USW00014895 250 W +#> 7 2013-10-01T00:00:00 TMIN GHCND:USW00014895 133 W +#> 8 2013-10-01T00:00:00 WDF2 GHCND:USW00014895 210 W +#> 9 2013-10-01T00:00:00 WDF5 GHCND:USW00014895 230 W +#> 10 2013-10-01T00:00:00 WSF2 GHCND:USW00014895 76 W +#> .. ... ... ... ... ... ... ... +#> Variables not shown: fl_t (chr) +#> +#> attr(,"class") +#> [1] "ncdc_data" +``` + +You probably won't know what station you want data from off hand though, so you can first search for stations, in this example using a bounding box that defines a rectangular area near Seattle + + +```r +library("lawn") +lawn_bbox_polygon(c(-122.2047, 47.5204, -122.1065, 47.6139)) %>% view +``` + +![lawnplot](/public/img/2015-07-07-weather-data-with-rnoaa/lawnplot.png) + +We'll search within that bounding box for weather stations. + + +```r +ncdc_stations(extent = c(47.5204, -122.2047, 47.6139, -122.1065)) +#> $meta +#> $meta$totalCount +#> [1] 9 +#> +#> $meta$pageCount +#> [1] 25 +#> +#> $meta$offset +#> [1] 1 +#> +#> +#> $data +#> Source: local data frame [9 x 9] +#> +#> elevation mindate maxdate latitude name +#> 1 199.6 2008-06-01 2015-06-29 47.5503 EASTGATE 1.7 SSW, WA US +#> 2 240.8 2010-05-01 2015-07-05 47.5604 EASTGATE 1.1 SW, WA US +#> 3 85.6 2008-07-01 2015-07-05 47.5916 BELLEVUE 0.8 S, WA US +#> 4 104.2 2008-06-01 2015-07-05 47.5211 NEWPORT HILLS 1.9 SSE, WA US +#> 5 58.5 2008-08-01 2009-04-12 47.6138 BELLEVUE 2.3 ENE, WA US +#> 6 199.9 2008-06-01 2009-11-22 47.5465 NEWPORT HILLS 1.4 E, WA US +#> 7 27.1 2008-07-01 2015-07-05 47.6046 BELLEVUE 1.8 W, WA US +#> 8 159.4 2008-11-01 2015-07-05 47.5694 BELLEVUE 2.3 SSE, WA US +#> 9 82.3 2008-12-01 2010-09-17 47.6095 BELLEVUE 0.6 NE, WA US +#> Variables not shown: datacoverage (dbl), id (chr), elevationUnit (chr), +#> longitude (dbl) +#> +#> attr(,"class") +#> [1] "ncdc_stations" +``` + +And there are 9 found. We could then use their station ids (e.g., `GHCND:US1WAKG0024`) to search for data using `ncdc()`, or search for what kind of data that station has with `ncdc_datasets()`, or other functions. + +## GHCND + +* GHCND = Global Historical Climatology Network Daily (Data) +* Data comes from an FTP server + + +```r +library("dplyr") +dat <- ghcnd(stationid = "AGE00147704") +dat$data %>% + filter(element == "PRCP", year == 1909) +#> id year month element VALUE1 MFLAG1 QFLAG1 SFLAG1 VALUE2 MFLAG2 +#> 1 AGE00147704 1909 11 PRCP -9999 NA -9999 NA +#> 2 AGE00147704 1909 12 PRCP 23 NA E 0 NA +#> QFLAG2 SFLAG2 VALUE3 MFLAG3 QFLAG3 SFLAG3 VALUE4 MFLAG4 QFLAG4 SFLAG4 +#> 1 -9999 NA -9999 NA +#> 2 E 0 NA E 0 NA E +#> VALUE5 MFLAG5 QFLAG5 SFLAG5 VALUE6 MFLAG6 QFLAG6 SFLAG6 VALUE7 MFLAG7 +#> 1 -9999 NA -9999 NA -9999 NA +#> 2 0 NA E 0 NA E 0 NA +#> QFLAG7 SFLAG7 VALUE8 MFLAG8 QFLAG8 SFLAG8 VALUE9 MFLAG9 QFLAG9 SFLAG9 +#> 1 NA -9999 NA -9999 NA +#> 2 NA E 250 NA E 75 NA E +#> VALUE10 MFLAG10 QFLAG10 SFLAG10 VALUE11 MFLAG11 QFLAG11 SFLAG11 VALUE12 +#> 1 -9999 NA -9999 NA -9999 +#> 2 131 NA E 0 NA E 0 +#> MFLAG12 QFLAG12 SFLAG12 VALUE13 MFLAG13 QFLAG13 SFLAG13 VALUE14 MFLAG14 +#> 1 NA -9999 NA -9999 NA +#> 2 NA E 0 NA E 0 NA +#> QFLAG14 SFLAG14 VALUE15 MFLAG15 QFLAG15 SFLAG15 VALUE16 MFLAG16 QFLAG16 +#> 1 -9999 NA -9999 NA +#> 2 E 0 NA E 0 NA +#> SFLAG16 VALUE17 MFLAG17 QFLAG17 SFLAG17 VALUE18 MFLAG18 QFLAG18 SFLAG18 +#> 1 -9999 NA -9999 NA +#> 2 E 0 NA E 0 NA E +#> VALUE19 MFLAG19 QFLAG19 SFLAG19 VALUE20 MFLAG20 QFLAG20 SFLAG20 VALUE21 +#> 1 -9999 NA NA -9999 NA NA -9999 +#> 2 0 NA NA E 0 NA NA E 0 +#> MFLAG21 QFLAG21 SFLAG21 VALUE22 MFLAG22 QFLAG22 SFLAG22 VALUE23 MFLAG23 +#> 1 NA -9999 NA 22 NA +#> 2 NA E 0 NA E 0 NA +#> QFLAG23 SFLAG23 VALUE24 MFLAG24 QFLAG24 SFLAG24 VALUE25 MFLAG25 QFLAG25 +#> 1 NA E 9 NA NA E 5 NA NA +#> 2 NA E 0 NA NA E 0 NA NA +#> SFLAG25 VALUE26 MFLAG26 QFLAG26 SFLAG26 VALUE27 MFLAG27 QFLAG27 SFLAG27 +#> 1 E 0 NA E 86 NA NA E +#> 2 E 0 NA E 0 NA NA E +#> VALUE28 MFLAG28 QFLAG28 SFLAG28 VALUE29 MFLAG29 QFLAG29 SFLAG29 VALUE30 +#> 1 0 NA NA E 28 NA NA E 0 +#> 2 0 NA NA E 0 NA NA E 0 +#> MFLAG30 QFLAG30 SFLAG30 VALUE31 MFLAG31 QFLAG31 SFLAG31 +#> 1 NA E -9999 NA NA +#> 2 NA E 57 NA NA E +``` + +You can also get to datasets by searching by station id, date min, date max, and variable. E.g. + + +```r +ghcnd_search("AGE00147704", var = "PRCP") +#> $prcp +#> Source: local data frame [9,803 x 6] +#> +#> id prcp date mflag qflag sflag +#> 1 AGE00147704 -9999 1909-11-01 NA +#> 2 AGE00147704 23 1909-12-01 NA E +#> 3 AGE00147704 81 1910-01-01 NA E +#> 4 AGE00147704 0 1910-02-01 NA E +#> 5 AGE00147704 18 1910-03-01 NA E +#> 6 AGE00147704 0 1910-04-01 NA E +#> 7 AGE00147704 223 1910-05-01 NA E +#> 8 AGE00147704 0 1910-06-01 NA E +#> 9 AGE00147704 0 1910-07-01 NA E +#> 10 AGE00147704 0 1910-08-01 NA E +#> .. ... ... ... ... ... ... +``` + +## ISD + +* ISD = Integrated Surface Database +* Data comes from an FTP server + +You'll likely first want to run `isd_stations()` to get list of stations + + +```r +stations <- isd_stations() +head(stations) +#> usaf wban station_name ctry state icao lat lon elev_m begin end +#> 1 7005 99999 CWOS 07005 NA NA NA 20120127 20120127 +#> 2 7011 99999 CWOS 07011 NA NA NA 20111025 20121129 +#> 3 7018 99999 WXPOD 7018 0 0 7018 20110309 20130730 +#> 4 7025 99999 CWOS 07025 NA NA NA 20120127 20120127 +#> 5 7026 99999 WXPOD 7026 AF 0 0 7026 20120713 20141120 +#> 6 7034 99999 CWOS 07034 NA NA NA 20121024 20121106 +``` + +Then get data from particular stations, like + + +```r +(res <- isd(usaf = "011490", wban = "99999", year = 1986)) +#> +#> Size: 1328 X 85 +#> +#> total_chars usaf_station wban_station date time date_flag latitude +#> 1 50 11490 99999 19860101 0 4 66267 +#> 2 123 11490 99999 19860101 600 4 66267 +#> 3 50 11490 99999 19860101 1200 4 66267 +#> 4 94 11490 99999 19860101 1800 4 66267 +#> 5 50 11490 99999 19860102 0 4 66267 +#> 6 123 11490 99999 19860102 600 4 66267 +#> 7 50 11490 99999 19860102 1200 4 66267 +#> 8 94 11490 99999 19860102 1800 4 66267 +#> 9 50 11490 99999 19860103 0 4 66267 +#> 10 123 11490 99999 19860103 600 4 66267 +#> .. ... ... ... ... ... ... ... +#> Variables not shown: longitude (int), type_code (chr), elevation (int), +#> call_letter (int), quality (chr), wind_direction (int), +#> wind_direction_quality (int), wind_code (chr), wind_speed (int), +#> wind_speed_quality (int), ceiling_height (int), +#> ceiling_height_quality (int), ceiling_height_determination (chr), +#> ceiling_height_cavok (chr), visibility_distance (int), +#> visibility_distance_quality (int), visibility_code (chr), +#> visibility_code_quality (int), temperature (int), temperature_quality +#> (int), temperature_dewpoint (int), temperature_dewpoint_quality +#> (int), air_pressure (int), air_pressure_quality (int), +#> AG1.precipitation (chr), AG1.discrepancy (int), AG1.est_water_depth +#> (int), GF1.sky_condition (chr), GF1.coverage (int), +#> GF1.opaque_coverage (int), GF1.coverage_quality (int), +#> GF1.lowest_cover (int), GF1.lowest_cover_quality (int), +#> GF1.low_cloud_genus (int), GF1.low_cloud_genus_quality (int), +#> GF1.lowest_cloud_base_height (int), +#> GF1.lowest_cloud_base_height_quality (int), GF1.mid_cloud_genus +#> (int), GF1.mid_cloud_genus_quality (int), GF1.high_cloud_genus (int), +#> GF1.high_cloud_genus_quality (int), MD1.atmospheric_change (chr), +#> MD1.tendency (int), MD1.tendency_quality (int), MD1.three_hr (int), +#> MD1.three_hr_quality (int), MD1.twentyfour_hr (int), +#> MD1.twentyfour_hr_quality (int), REM.remarks (chr), REM.identifier +#> (chr), REM.length_quantity (int), REM.comment (chr), KA1.extreme_temp +#> (chr), KA1.period_quantity (int), KA1.max_min (chr), KA1.temp (int), +#> KA1.temp_quality (int), AY1.manual_occurrence (chr), +#> AY1.condition_code (int), AY1.condition_quality (int), AY1.period +#> (int), AY1.period_quality (int), AY2.manual_occurrence (chr), +#> AY2.condition_code (int), AY2.condition_quality (int), AY2.period +#> (int), AY2.period_quality (int), MW1.first_weather_reported (chr), +#> MW1.condition (int), MW1.condition_quality (int), +#> EQD.observation_identifier (chr), EQD.observation_text (int), +#> EQD.reason_code (int), EQD.parameter (chr), +#> EQD.observation_identifier.1 (chr), EQD.observation_text.1 (int), +#> EQD.reason_code.1 (int), EQD.parameter.1 (chr) +``` + +## Severe weather + +* SWDI = Severe Weather Data Inventory +* From the SWDI site + +> The Severe Weather Data Inventory (SWDI) is an integrated database of severe weather records for the United States. The records in SWDI come from a variety of sources in the NCDC archive. + +The `swdi()` function allows you to get data in xml, csv, shp, or kmz format. You can get data from many different datasets: + +* nx3tvs NEXRAD Level-3 Tornado Vortex Signatures (point) +* nx3meso NEXRAD Level-3 Mesocyclone Signatures (point) +* nx3hail NEXRAD Level-3 Hail Signatures (point) +* nx3structure NEXRAD Level-3 Storm Cell Structure Information (point) +* plsr Preliminary Local Storm Reports (point) +* warn Severe Thunderstorm, Tornado, Flash Flood and Special Marine warnings (polygon) +* nldn Lightning strikes from Vaisala (.gov and .mil ONLY) (point) + +An example: Get all `plsr` within the bounding box (-91,30,-90,31) + + +```r +swdi(dataset = 'plsr', startdate = '20060505', enddate = '20060510', +bbox = c(-91, 30, -90, 31)) +#> $meta +#> $meta$totalCount +#> numeric(0) +#> +#> $meta$totalTimeInSeconds +#> [1] 0 +#> +#> +#> $data +#> Source: local data frame [5 x 8] +#> +#> ztime id event magnitude city +#> 1 2006-05-09T02:20:00Z 427540 HAIL 1 5 E KENTWOOD +#> 2 2006-05-09T02:40:00Z 427536 HAIL 1 MOUNT HERMAN +#> 3 2006-05-09T02:40:00Z 427537 TSTM WND DMG -9999 MOUNT HERMAN +#> 4 2006-05-09T03:00:00Z 427199 HAIL 0 FRANKLINTON +#> 5 2006-05-09T03:17:00Z 427200 TORNADO -9999 5 S FRANKLINTON +#> Variables not shown: county (chr), state (chr), source (chr) +#> +#> $shape +#> shape +#> 1 POINT (-90.43 30.93) +#> 2 POINT (-90.3 30.96) +#> 3 POINT (-90.3 30.96) +#> 4 POINT (-90.14 30.85) +#> 5 POINT (-90.14 30.78) +#> +#> attr(,"class") +#> [1] "swdi" +``` + +## Sea ice + +The `seaice()` function simply grabs shape files that describe sea ice cover at the Northa and South poles, and can be useful for examining change through time in sea ice cover, among other things. + +An example: Plot sea ice cover for April 1990 for the North pole. + + +```r +urls <- seaiceeurls(mo = 'Apr', pole = 'N', yr = 1990) +out <- seaice(urls) + +library('ggplot2') +ggplot(out, aes(long, lat, group = group)) + + geom_polygon(fill = "steelblue") + + theme_ice() +``` + +![plot of chunk unnamed-chunk-13](/public/img/2015-07-07-weather-data-with-rnoaa/unnamed-chunk-13-1.png) + +## Buoys + +* Get NOAA buoy data from the National Buoy Data Center + +Using buoy data requires the `ncdf` package. Make sure you have that installed, like `install.packages("ncdf")`. `buoy()` and `buoys()` will fail if you don't have `ncdf` installed. + +`buoys()` - Get available buoys given a dataset name + + +```r +head(buoys(dataset = 'cwind')) +#> id +#> 1 41001 +#> 2 41002 +#> 3 41004 +#> 4 41006 +#> 5 41008 +#> 6 41009 +#> url +#> 1 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41001/catalog.html +#> 2 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41002/catalog.html +#> 3 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41004/catalog.html +#> 4 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41006/catalog.html +#> 5 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41008/catalog.html +#> 6 http://dods.ndbc.noaa.gov/thredds/catalog/data/cwind/41009/catalog.html +``` + +`buoy()` - Get data for a buoy - if no year or datatype specified, we get the first file + + +```r +buoy(dataset = 'cwind', buoyid = 46085) +#> Dimensions (rows/cols): [33486 X 5] +#> 2 variables: [wind_dir, wind_spd] +#> +#> time latitude longitude wind_dir wind_spd +#> 1 2007-05-05T02:00:00Z 55.855 -142.559 331 2.8 +#> 2 2007-05-05T02:10:00Z 55.855 -142.559 328 2.6 +#> 3 2007-05-05T02:20:00Z 55.855 -142.559 329 2.2 +#> 4 2007-05-05T02:30:00Z 55.855 -142.559 356 2.1 +#> 5 2007-05-05T02:40:00Z 55.855 -142.559 360 1.5 +#> 6 2007-05-05T02:50:00Z 55.855 -142.559 10 1.9 +#> 7 2007-05-05T03:00:00Z 55.855 -142.559 10 2.2 +#> 8 2007-05-05T03:10:00Z 55.855 -142.559 14 2.2 +#> 9 2007-05-05T03:20:00Z 55.855 -142.559 16 2.1 +#> 10 2007-05-05T03:30:00Z 55.855 -142.559 22 1.6 +#> .. ... ... ... ... ... +``` + +## Tornadoes + +The function `tornadoes()` gets tornado data from [http://www.spc.noaa.gov/gis/svrgis/](http://www.spc.noaa.gov/gis/svrgis/). + + +```r +shp <- tornadoes() +library('sp') +plot(shp) +``` + +![tornadoes](/public/img/2015-07-07-weather-data-with-rnoaa/tornadoes.png) + +## Historical Observing Metadata Repository + +* HOMR = Historical Observing Metadata Repository +* Data from RESTful API at [http://www.ncdc.noaa.gov/homr/api](http://www.ncdc.noaa.gov/homr/api) + +`homr_definitions()` gets you definitions and metadata for datasets + + +```r +head(homr_definitions()) +#> Source: local data frame [6 x 7] +#> +#> defType abbr fullName displayName +#> 1 ids GHCND GHCND IDENTIFIER GHCND ID +#> 2 ids COOP COOP NUMBER COOP ID +#> 3 ids WBAN WBAN NUMBER WBAN ID +#> 4 ids FAA FAA LOCATION IDENTIFIER FAA ID +#> 5 ids ICAO ICAO ID ICAO ID +#> 6 ids TRANS TRANSMITTAL ID Transmittal ID +#> Variables not shown: description (chr), cssaName (chr), ghcndName (chr) +``` + +`homr()` gets you metadata for stations given query parameters. In this example, search for data for the state of Delaware + + +```r +res <- homr(state = 'DE') +names(res) # the stations +#> [1] "10001871" "10100162" "10100164" "10100166" "20004155" "20004158" +#> [7] "20004160" "20004162" "20004163" "20004168" "20004171" "20004176" +#> [13] "20004178" "20004179" "20004180" "20004182" "20004184" "20004185" +#> [19] "30001831" "30017384" "30020917" "30021161" "30021998" "30022674" +#> [25] "30026770" "30027455" "30032423" "30032685" "30034222" "30039554" +#> [31] "30043742" "30046662" "30046814" "30051475" "30057217" "30063570" +#> [37] "30064900" "30065901" "30067636" "30069663" "30075067" "30077378" +#> [43] "30077857" "30077923" "30077988" "30079088" "30079240" "30082430" +#> [49] "30084216" "30084262" "30084537" "30084796" "30094582" "30094639" +#> [55] "30094664" "30094670" "30094683" "30094730" "30094806" "30094830" +#> [61] "30094917" "30094931" "30094936" "30094991" +``` + +You can index to each one to get more data + +## Storms + +* Data from: International Best Track Archive for Climate Stewardship (IBTrACS) +* Data comes from an FTP server + +Flat files (csv's) are served up as well as shp files. In this example, plot storm data for the year 1940 + + +```r +(res3 <- storm_shp(year = 1940)) +#> +#> Path: ~/.rnoaa/storms/year/Year.1940.ibtracs_all_points.v03r06.shp +#> Basin: +#> Storm: +#> Year: 1940 +#> Type: points +res3shp <- storm_shp_read(res3) +sp::plot(res3shp) +``` + +![plot of chunk unnamed-chunk-19](/public/img/2015-07-07-weather-data-with-rnoaa/unnamed-chunk-19-1.png)
    diff --git a/_site/page30/index.html b/_site/page30/index.html index 900e1277dc..14ccb30159 100644 --- a/_site/page30/index.html +++ b/_site/page30/index.html @@ -59,6 +59,208 @@

    Recology

      +
    +

    + + Making matrices with zeros and ones + +

    + + + + ********* + +## So I was trying to figure out a fast way to make matrices with randomly allocated 0 or 1 in each cell of the matrix. I reached out on Twitter, and got many responses (thanks tweeps!). + +********* + +### Here is the solution I came up with. See if you can tell why it would be slow. + +{% highlight r linenos %} +mm <- matrix(0, 10, 5) +apply(mm, c(1, 2), function(x) sample(c(0, 1), 1)) +{% endhighlight %} + + + +{% highlight text %} + [,1] [,2] [,3] [,4] [,5] + [1,] 1 0 1 0 1 + [2,] 0 0 1 1 1 + [3,] 0 0 0 0 1 + [4,] 0 1 1 0 1 + [5,] 0 1 1 1 1 + [6,] 1 0 1 1 1 + [7,] 0 1 0 1 0 + [8,] 0 0 1 0 1 + [9,] 1 0 1 1 1 +[10,] 1 0 0 1 1 +{% endhighlight %} + + +********* + +### Ted Hart (@distribecology) replied first with: + +{% highlight r linenos %} +matrix(rbinom(10 * 5, 1, 0.5), ncol = 5, nrow = 10) +{% endhighlight %} + + + +{% highlight text %} + [,1] [,2] [,3] [,4] [,5] + [1,] 1 1 0 1 1 + [2,] 1 0 0 1 0 + [3,] 0 1 0 0 0 + [4,] 0 0 1 0 0 + [5,] 1 0 1 0 0 + [6,] 0 0 0 0 1 + [7,] 1 0 0 0 0 + [8,] 0 1 0 1 0 + [9,] 1 1 1 1 0 +[10,] 0 1 1 0 0 +{% endhighlight %} + + +********* + + +### Next, David Smith (@revodavid) and Rafael Maia (@hylospar) came up with about the same solution. + +{% highlight r linenos %} +m <- 10 +n <- 5 +matrix(sample(0:1, m * n, replace = TRUE), m, n) +{% endhighlight %} + + + +{% highlight text %} + [,1] [,2] [,3] [,4] [,5] + [1,] 0 0 0 0 1 + [2,] 0 0 0 0 0 + [3,] 0 1 1 0 1 + [4,] 1 0 0 1 0 + [5,] 0 0 0 0 1 + [6,] 1 0 1 1 1 + [7,] 1 1 1 1 0 + [8,] 0 0 0 1 1 + [9,] 1 0 0 0 1 +[10,] 0 1 0 1 1 +{% endhighlight %} + + +********* + + +### Then there was the solution by Luis Apiolaza (@zentree). + +{% highlight r linenos %} +m <- 10 +n <- 5 +round(matrix(runif(m * n), m, n)) +{% endhighlight %} + + + +{% highlight text %} + [,1] [,2] [,3] [,4] [,5] + [1,] 0 1 1 0 0 + [2,] 1 0 1 1 0 + [3,] 1 0 1 0 0 + [4,] 1 0 0 0 1 + [5,] 1 0 1 1 0 + [6,] 1 0 0 0 0 + [7,] 1 0 0 0 0 + [8,] 1 1 1 0 0 + [9,] 0 0 0 0 1 +[10,] 1 0 0 1 1 +{% endhighlight %} + + +********* + +### Last, a solution was proposed using `RcppArmadillo`, but I couldn't get it to work on my machine, but here is the function anyway if someone can. + +{% highlight r linenos %} +library(inline) +library(RcppArmadillo) +f <- cxxfunction(body = "return wrap(arma::randu(5,10));", plugin = "RcppArmadillo") +{% endhighlight %} + + +********* + +### And here is the comparison of system.time for each solution. + +{% highlight r linenos %} +mm <- matrix(0, 10, 5) +m <- 10 +n <- 5 + +system.time(replicate(1000, apply(mm, c(1, 2), function(x) sample(c(0, 1), 1)))) # @recology_ +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 0.470 0.002 0.471 +{% endhighlight %} + + + +{% highlight r linenos %} +system.time(replicate(1000, matrix(rbinom(10 * 5, 1, 0.5), ncol = 5, nrow = 10))) # @distribecology +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 0.014 0.000 0.015 +{% endhighlight %} + + + +{% highlight r linenos %} +system.time(replicate(1000, matrix(sample(0:1, m * n, replace = TRUE), m, n))) # @revodavid & @hylospar +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 0.015 0.000 0.014 +{% endhighlight %} + + + +{% highlight r linenos %} +system.time(replicate(1000, round(matrix(runif(m * n), m, n)), )) # @zentree +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 0.014 0.000 0.014 +{% endhighlight %} + + +### If you want to take the time to learn C++ or already know it, the RcppArmadillo option would likely be the fastest, but I think (IMO) for many scientists, especially ecologists, we probably don't already know C++, so will stick to the next fastest options. + +********* + +### Get the .Rmd file used to create this post [at my github account](https://github.com/sckott/sckott.github.com/tree/master/_drafts/2012-08-30-making-matrices.Rmd). + +********* + +### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/). + +
    +

    @@ -68,98 +270,136 @@

    -
    -

    UPDATE: changed data source so that the entire example can be run by anyone on their own machine. Also, per Joachim's suggestion, I put a box around the blown up area of the map. In addition, rgeos and maptools removed, not needed.

    -
    - -
    - -

    Here's a quick demo of creating a map with an inset within it using ggplot. The inset is achieved using the gridExtra package.

    - -

    Install libraries

    -
    install.packages(c("ggplot2", "maps", "grid", "gridExtra"))
    -
    library("ggplot2")
    -library("maps")
    -library("grid")
    -library("gridExtra")
    -
    -

    Create a data frame

    -
    dat <- data.frame(ecosystem = rep(c("oak", "steppe", "prairie"), each = 8),
    -    lat = rnorm(24, mean = 51, sd = 1), lon = rnorm(24, mean = -113, sd = 5))
    -head(dat)
    -#>   ecosystem      lat       lon
    -#> 1       oak 49.58285 -107.6930
    -#> 2       oak 52.58942 -116.6920
    -#> 3       oak 50.49277 -114.5542
    -#> 4       oak 50.05943 -116.5660
    -#> 5       oak 51.76492 -112.1457
    -#> 6       oak 52.82153 -112.8858
    -
    -

    Get maps using the maps library

    - -

    Get a map of Canada

    -
    canadamap <- data.frame(map("world", "Canada", plot = FALSE)[c("x", "y")])
    -
    -

    Get a map of smaller extent

    -
    canadamapsmall <- canadamap[canadamap$x < -90 & canadamap$y < 54, ]
    -canadamapsmall_ <- na.omit(canadamapsmall)
    -
    -

    This should get your corner points for the box, picking min and max of lat and lon

    -
    (insetrect <- data.frame(xmin = min(canadamapsmall_$x), xmax = max(canadamapsmall_$x),
    -    ymin = min(canadamapsmall_$y), ymax = max(canadamapsmall_$y)))
    -#>        xmin      xmax     ymin     ymax
    -#> 1 -133.0975 -90.38942 48.04721 53.99915
    -
    -

    Make the maps

    - -

    Create a theme to be used by both plots

    -
    ptheme <- theme(
    -  panel.border = element_rect(colour = 'black', size = 1, linetype = 1),
    -  panel.grid.major = element_blank(), 
    -  panel.grid.minor = element_blank(),
    -  panel.background = element_rect(fill = 'white'),
    -  legend.key = element_blank()
    -)
    -
    -

    The inset map, all of Canada

    -
    a <- ggplot(canadamap) +
    -    theme_bw(base_size = 22) +
    -    geom_path(data = canadamap, aes(x, y), colour = "black", fill = "white") +
    -    geom_rect(data = insetrect, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), alpha = 0, colour = "blue", size = 1, linetype = 1) +
    -  ptheme %+% theme(
    -    legend.position = c(0.15, 0.80),
    -    axis.ticks = element_blank(), 
    -    axis.text.x = element_blank(),
    -    axis.text.y = element_blank()
    -  ) +
    -    labs(x = '', y = '')
    -
    -

    The larger map, zoomed in, with the data

    -
    b <- ggplot(dat, aes(lon, lat, colour = ecosystem)) +
    -    theme_bw(base_size = 22) +
    -    geom_jitter(size = 4, alpha = 0.6) +
    -    geom_path(data = canadamapsmall, aes(x, y), colour = "black", fill = "white") +
    -    scale_size(guide = "none") +
    -  ptheme %+% theme(
    -    legend.position = c(0.1, 0.20),
    -    legend.text = element_text(size = 12, face = 'bold'),
    -    legend.title = element_text(size = 12, face = 'bold'), 
    -    axis.ticks = element_line(size = 2)
    -  ) +
    -    labs(x = '', y = '')
    -
    -

    Print maps

    - -

    One an inset of the other. This approach uses the gridExtra package for flexible alignment, etc. of ggplot graphs.

    -
    grid.newpage()
    -vpb_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5)  # the larger map
    -vpa_ <- viewport(width = 0.4, height = 0.4, x = 0.8, y = 0.8)  # the inset in upper right
    -print(b, vp = vpb_)
    -print(a, vp = vpa_)
    -
    -

    plot of chunk unnamed-chunk-11

    - -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    + > UPDATE: changed data source so that the entire example can be run by anyone on their own machine. Also, per Joachim's suggestion, I put a box around the blown up area of the map. In addition, rgeos and maptools removed, not needed. + +********* + +Here's a quick demo of creating a map with an inset within it using ggplot. The inset is achieved using the `gridExtra` package. + +### Install libraries + + +```r +install.packages(c("ggplot2", "maps", "grid", "gridExtra")) +``` + + +```r +library("ggplot2") +library("maps") +library("grid") +library("gridExtra") +``` + +### Create a data frame + + +```r +dat <- data.frame(ecosystem = rep(c("oak", "steppe", "prairie"), each = 8), + lat = rnorm(24, mean = 51, sd = 1), lon = rnorm(24, mean = -113, sd = 5)) +head(dat) +#> ecosystem lat lon +#> 1 oak 49.58285 -107.6930 +#> 2 oak 52.58942 -116.6920 +#> 3 oak 50.49277 -114.5542 +#> 4 oak 50.05943 -116.5660 +#> 5 oak 51.76492 -112.1457 +#> 6 oak 52.82153 -112.8858 +``` + +### Get maps using the maps library + +Get a map of Canada + + +```r +canadamap <- data.frame(map("world", "Canada", plot = FALSE)[c("x", "y")]) +``` + +Get a map of smaller extent + + +```r +canadamapsmall <- canadamap[canadamap$x < -90 & canadamap$y < 54, ] +canadamapsmall_ <- na.omit(canadamapsmall) +``` + +This should get your corner points for the box, picking min and max of lat and lon + + +```r +(insetrect <- data.frame(xmin = min(canadamapsmall_$x), xmax = max(canadamapsmall_$x), + ymin = min(canadamapsmall_$y), ymax = max(canadamapsmall_$y))) +#> xmin xmax ymin ymax +#> 1 -133.0975 -90.38942 48.04721 53.99915 +``` + +### Make the maps + +Create a theme to be used by both plots + + +```r +ptheme <- theme( + panel.border = element_rect(colour = 'black', size = 1, linetype = 1), + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_rect(fill = 'white'), + legend.key = element_blank() +) +``` + +The inset map, all of Canada + + +```r +a <- ggplot(canadamap) + + theme_bw(base_size = 22) + + geom_path(data = canadamap, aes(x, y), colour = "black", fill = "white") + + geom_rect(data = insetrect, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), alpha = 0, colour = "blue", size = 1, linetype = 1) + + ptheme %+% theme( + legend.position = c(0.15, 0.80), + axis.ticks = element_blank(), + axis.text.x = element_blank(), + axis.text.y = element_blank() + ) + + labs(x = '', y = '') +``` + +The larger map, zoomed in, with the data + + +```r +b <- ggplot(dat, aes(lon, lat, colour = ecosystem)) + + theme_bw(base_size = 22) + + geom_jitter(size = 4, alpha = 0.6) + + geom_path(data = canadamapsmall, aes(x, y), colour = "black", fill = "white") + + scale_size(guide = "none") + + ptheme %+% theme( + legend.position = c(0.1, 0.20), + legend.text = element_text(size = 12, face = 'bold'), + legend.title = element_text(size = 12, face = 'bold'), + axis.ticks = element_line(size = 2) + ) + + labs(x = '', y = '') +``` + +### Print maps + +One an inset of the other. This approach uses the `gridExtra` package for flexible alignment, etc. of ggplot graphs. + + +```r +grid.newpage() +vpb_ <- viewport(width = 1, height = 1, x = 0.5, y = 0.5) # the larger map +vpa_ <- viewport(width = 0.4, height = 0.4, x = 0.8, y = 0.8) # the inset in upper right +print(b, vp = vpb_) +print(a, vp = vpa_) +``` + +![plot of chunk unnamed-chunk-11](/public/img/2012-08-22-ggplot-inset-map/unnamed-chunk-11-1.png) + +Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/).

    @@ -172,185 +412,146 @@

    -

    Example of using the Global Names Resolver API to check species names

    + ## Example of using the Global Names Resolver API to check species names + +********* + +There are a number of options for resolution of taxonomic names. The [Taxonomic Name Resolution Service (TNRS)](http://tnrs.iplantcollaborative.org/) comes to mind. There is a new service for taxonomic name resoultion called the [Global Names Resolver](http://resolver.globalnames.org/). They describe the service thusly "_Resolve lists of scientific names against known sources. This service parses incoming names, executes exact or fuzzy matching as required, and displays a confidence score for each match along with its identifier._". -
    +********* -

    There are a number of options for resolution of taxonomic names. The Taxonomic Name Resolution Service (TNRS) comes to mind. There is a new service for taxonomic name resoultion called the Global Names Resolver. They describe the service thusly "Resolve lists of scientific names against known sources. This service parses incoming names, executes exact or fuzzy matching as required, and displays a confidence score for each match along with its identifier.".

    +## Load required packages -
    +### Just uncomment the code to use. -

    Load required packages

    -

    Just uncomment the code to use.

    +{% highlight r linenos %} +# If you don't have them already +# install.packages(c('RJSONIO','plyr','devtools')) require(devtools) +# install_github('taxize_','ropensci') +library(RJSONIO) +library(plyr) +library(taxize) +{% endhighlight %} -
    1 # If you don't have them already
    -2 # install.packages(c('RJSONIO','plyr','devtools')) require(devtools)
    -3 # install_github('taxize_','ropensci')
    -4 library(RJSONIO)
    -5 library(plyr)
    -6 library(taxize)
    -

    Get the data sources available

    +## Get the data sources available -

    Get just id's and names of sources in a data.frame

    +### Get just id's and names of sources in a data.frame -
    1 tail(gnr_datasources(todf = T))
    +{% highlight r linenos %} +tail(gnr_datasources(todf = T)) +{% endhighlight %} -
    ##     id                                title
    +
    +
    +{% highlight text %}
    +##     id                                title
     ## 82 164                            BioLib.cz
     ## 83 165 Tropicos - Missouri Botanical Garden
     ## 84 166                                nlbif
     ## 85 167                                 IPNI
     ## 86 168              Index to Organism Names
    -## 87 169                        uBio NameBank
    +## 87 169 uBio NameBank +{% endhighlight %} + + +********* -
    +### Give me the id for EOL (Encyclopedia of Life) -

    Give me the id for EOL (Encyclopedia of Life)

    +{% highlight r linenos %} +out <- gnr_datasources(todf = T) +out[out$title == "EOL", "id"] +{% endhighlight %} -
    1 out <- gnr_datasources(todf = T)
    -2 out[out$title == "EOL", "id"]
    -
    ## [1] 12
    -
    +{% highlight text %} +## [1] 12 +{% endhighlight %} -

    Fuzzy search for sources with the word "zoo"

    -
    1 out <- gnr_datasources(todf = T)
    -2 outdf <- out[agrep("zoo", out$title, ignore.case = T), ]
    -3 outdf[1:2, ]
    +********* -
    ##     id             title
    +### Fuzzy search for sources with the word "zoo"
    +
    +{% highlight r linenos %}
    +out <- gnr_datasources(todf = T)
    +outdf <- out[agrep("zoo", out$title, ignore.case = T), ]
    +outdf[1:2, ]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +##     id             title
     ## 20 100 Mushroom Observer
    -## 25 105           ZooKeys
    +## 25 105 ZooKeys +{% endhighlight %} -

    Resolve some names

    -

    Search for Helianthus annuus and Homo sapiens, return a data.frame

    +## Resolve some names -
    1 gnr(names = c("Helianthus annuus", "Homo sapiens"), returndf = TRUE)[1:2, ]
    +### Search for _Helianthus annuus_ and _Homo sapiens_, return a data.frame -
    ##   data_source_id    submitted_name       name_string score    title
    +{% highlight r linenos %}
    +gnr(names = c("Helianthus annuus", "Homo sapiens"), returndf = TRUE)[1:2, ]
    +{% endhighlight %}
    +
    +
    +
    +{% highlight text %}
    +##   data_source_id    submitted_name       name_string score    title
     ## 1              4 Helianthus annuus Helianthus annuus 0.988     NCBI
    -## 3             10 Helianthus annuus Helianthus annuus 0.988 Freebase
    +## 3 10 Helianthus annuus Helianthus annuus 0.988 Freebase +{% endhighlight %} + -
    +********* -

    Search for the same species, with only using data source 12 (i.e., EOL)

    +### Search for the same species, with only using data source 12 (i.e., EOL) -
    1 gnr(names = c("Helianthus annuus", "Homo sapiens"), data_source_ids = "12", 
    -2     returndf = TRUE)
    +{% highlight r linenos %} +gnr(names = c("Helianthus annuus", "Homo sapiens"), data_source_ids = "12", + returndf = TRUE) +{% endhighlight %} -
    ##   data_source_id    submitted_name       name_string score title
    +
    +
    +{% highlight text %}
    +##   data_source_id    submitted_name       name_string score title
     ## 1             12 Helianthus annuus Helianthus annuus 0.988   EOL
    -## 2             12      Homo sapiens      Homo sapiens 0.988   EOL
    +## 2 12 Homo sapiens Homo sapiens 0.988 EOL +{% endhighlight %} -

    That's it. Have fun! And put bugs/comments/etc. here.

    -
    -

    Written in Markdown, with help from knitr, and nice knitr highlighting/etc. in in RStudio.

    +### That's it. Have fun! And put bugs/comments/etc. [here](https://github.com/ropensci/taxize_/issues). -
    +********* -

    I prepared the markdown for this post by:

    +### Written in [Markdown](http://daringfireball.net/projects/markdown/), with help from [knitr](http://yihui.name/knitr/), and nice knitr highlighting/etc. in in [RStudio](http://rstudio.org/). -
     1 KnitPost <- function(input, base.url = "/") {
    - 2     require(knitr)
    - 3     opts_knit$set(base.url = base.url)
    - 4     fig.path <- paste0("img/", sub(".Rmd$", "", basename(input)), "/")
    - 5     opts_chunk$set(fig.path = fig.path)
    - 6     opts_chunk$set(fig.cap = "center")
    - 7     render_jekyll()
    - 8     knit(input, envir = parent.frame())
    - 9 }
    -10 setwd("/path/to/_posts")
    -11 KnitPost("/path/to/postfile.Rmd")
    +********* -

    from jfisher.

    +### I prepared the markdown for this post by: -
    - -
    -

    - - Recent R packages for ecology and evolution - -

    +{% highlight r linenos %} +KnitPost <- function(input, base.url = "/") { + require(knitr) + opts_knit$set(base.url = base.url) + fig.path <- paste0("img/", sub(".Rmd$", "", basename(input)), "/") + opts_chunk$set(fig.path = fig.path) + opts_chunk$set(fig.cap = "center") + render_jekyll() + knit(input, envir = parent.frame()) +} +setwd("/path/to/_posts") +KnitPost("/path/to/postfile.Rmd") +{% endhighlight %} - - -

    Many R packages/tools have come out recently for doing ecology and evolution. All of the below were described in Methods in Ecology and Evolution, except for spider, which came out in Molecular Ecology Resources. Here are some highlights.

    - -
      -
    • mvabund paper - get R pkg - -
        -
      • Model-based analysis of multivariate abundance data. Visualising data, fitting predictive models, checking assumptions, hypothesis testing.
      • -
    • -
    • popdemo paper - get R pkg - -
        -
      • Population demography using projection matrix analysis.
      • -
    • -
    • motmot paper - get R pkg - -
        -
      • Models of trait macroevolution on trees
      • -
    • -
    • spider paper - get R pkg - -
        -
      • Analysis of species identity and evolution, with particular reference to DNA barcoding
      • -
    • -
    • BaSTA paper - get R pkg - -
        -
      • Bayesian estimation of age-specific survival from incomplete mark–recapture/recovery data with covariates
      • -
    • -
    • abc paper - get R pkg - -
        -
      • Approximate Bayesian Computation (ABC)
      • -
    • -
    • RNetLogo paper - get R pkg - -
        -
      • Running and exploring individual-based models implemented in NetLogo
      • -
    • -
    • phytools paper - get R pkg - -
        -
      • Tools for phylogenetic comparative biology
      • -
    • -
    • smatr paper - get R pkg - -
        -
      • Estimation and inference about allometric lines
      • -
    • -
    • RBrownie paper - get R pkg ? - -
        -
      • Testing hypotheses about rates of evolutionary change
      • -
    • -
    • polytomy resolver paper - get R pkg - -
        -
      • Resolve polytomies on dated phylogenies with their R scripts [here][].
      • -
    • -
    - -

    And a cool tool came out for the Python programming language.

    - -
      -
    • NichePy paper - get python - -
        -
      • Modular tools for estimating the similarity of ecological niche and species distribution models
      • -
    • -
    +from [jfisher](http://jfisher-usgs.github.com/r/2012/07/03/knitr-jekyll/).
    diff --git a/_site/page31/index.html b/_site/page31/index.html index ef3ac79c77..3cf53feae5 100644 --- a/_site/page31/index.html +++ b/_site/page31/index.html @@ -59,6 +59,61 @@

    Recology

      +
    +

    + + Recent R packages for ecology and evolution + +

    + + + + Many R packages/tools have come out recently for doing ecology and evolution. All of the below were described in Methods in Ecology and Evolution, except for spider, which came out in [Molecular Ecology Resources][mer]. Here are some highlights. + ++ mvabund [paper][mvabund] - [get R pkg](http://cran.r-project.org/web/packages/mvabund/index.html) + + Model-based analysis of multivariate abundance data. Visualising data, fitting predictive models, checking assumptions, hypothesis testing. ++ popdemo [paper][popdemo] - [get R pkg](http://cran.r-project.org/web/packages/popdemo/index.html) + + Population demography using projection matrix analysis. ++ motmot [paper][motmot] - [get R pkg](http://cran.r-project.org/web/packages/motmot/index.html) + + Models of trait macroevolution on trees ++ spider [paper][spider] - [get R pkg](http://cran.r-project.org/web/packages/spider/index.html) + + Analysis of species identity and evolution, with particular reference to DNA barcoding ++ BaSTA [paper][BaSTA] - [get R pkg](http://cran.r-project.org/web/packages/BaSTA/index.html) + + Bayesian estimation of age-specific survival from incomplete mark–recapture/recovery data with covariates ++ abc [paper][abc] - [get R pkg](http://cran.r-project.org/web/packages/abc/index.html) + + Approximate Bayesian Computation (ABC) ++ RNetLogo [paper][RNetLogo] - [get R pkg](http://cran.r-project.org/web/packages/RNetLogo/index.html) + + Running and exploring individual-based models implemented in NetLogo ++ phytools [paper][phytools] - [get R pkg](http://cran.r-project.org/web/packages/phytools/index.html) + + Tools for phylogenetic comparative biology ++ smatr [paper][smatr] - [get R pkg](http://cran.r-project.org/web/packages/smatr/index.html) + + Estimation and inference about allometric lines ++ RBrownie [paper][brown] - [get R pkg ?](http://www.brianomeara.info/tutorials/brownie) + + Testing hypotheses about rates of evolutionary change ++ polytomy resolver [paper][poly] - [get R pkg](http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00103.x/suppinfo) + + Resolve polytomies on dated phylogenies with their R scripts [here][]. + +And a cool tool came out for the Python programming language. + ++ NichePy [paper][NichePy] - [get python](https://github.com/bastodian/NichePy) + + Modular tools for estimating the similarity of ecological niche and species distribution models + +[mvabund]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2012.00190.x/abstract +[popdemo]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2012.00222.x/abstract +[motmot]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00132.x/abstract +[spider]: http://onlinelibrary.wiley.com/doi/10.1111/j.1755-0998.2011.03108.x/abstract?deniedAccessCustomisedMessage=&userIsAuthenticated=false +[BaSTA]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2012.00186.x/abstract +[abc]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00179.x/abstract +[RNetLogo]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00180.x/abstract +[phytools]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00169.x/abstract +[smatr]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00153.x/abstract +[brown]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00112.x/abstract +[poly]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00103.x/abstract +[NichePy]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00184.x/abstract +[mer]: http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1755-0998 + +
    +

    @@ -68,79 +123,68 @@

    -

    So OpenCPU is pretty awesome. You can run R in a browser using URL calls with an alphanumeric code (e.g., x3e50ee0780) defining a stored function, and any arguments you pass to it.

    - -

    Go here to store a function. And you can output lots of different types of things: png, pdf, json, etc - see here.

    - -

    Here's a function I created:

    + So [OpenCPU][] is pretty awesome. You can run R in a browser using URL calls with an alphanumeric code (e.g., x3e50ee0780) defining a stored function, and any arguments you pass to it. - +Go [here][] to store a function. And you can output lots of different types of things: png, pdf, json, etc - see [here][here2]. -

    It makes a ggplot2 graphic of your watchers and forks on each repo (up to 100 repos), sorted by descending number of forks/watchers.

    -

    Here's an example from the function. Paste the following in to your browser and you should get the below figure.

    +Here's a function I created: -

    http://beta.opencpu.org/R/call/opencpu.demo/gitstats/png

    + -

    had

    +It makes a [ggplot2][] graphic of your watchers and forks on each repo (up to 100 repos), sorted by descending number of forks/watchers. -

    And you can specify user or organization name using arguments in the URL

    +Here's an example from the function. Paste the following in to your browser and you should get the below figure. -

    http://beta.opencpu.org/R/call/opencpu.demo/gitstats/png?type='org'&id='ropensci'

    +http://beta.opencpu.org/R/call/opencpu.demo/gitstats/png -

    ropensci

    +![had](/public/img/hadley.png) -

    Sweet. Have fun.

    -

    - -
    -

    - - mvabund - new R pkg for multivariate abundance data - -

    - +And you can specify user or organization name using arguments in the URL -

    There is a new R package in town, mvabund, which does, as they say "statistical methods for analysing multivariate abundance data". The authors introduced the paper in an online early paper in Methods in Ecology and Evolution here, R package here.

    +http://beta.opencpu.org/R/call/opencpu.demo/gitstats/png?type='org'&id='ropensci' -

    The package is meant to visualize data, fit predictive models, check model assumptions, and test hypotheses about community-environment associations.

    +![ropensci](/public/img/ropensci.png) -

    Here is a quick example.

    - -

    mvabund1

    +Sweet. Have fun. -

    mvabund2

    +[ggplot2]: http://had.co.nz/ggplot2/ +[OpenCPU]: http://opencpu.org/ +[here]: http://beta.opencpu.org/apps/opencpu.demo/storefunction/ +[http://beta.opencpu.org/R/call/store:tmp/x3e50ee0780/png]: http://beta.opencpu.org/R/call/store:tmp/x3e50ee0780/png +[http://beta.opencpu.org/R/call/store:tmp/x3e50ee0780/png?id='ropensci'&type='org']: http://beta.opencpu.org/R/call/store:tmp/x3e50ee0780/png?id='ropensci'&type='org' +[here2]: http://opencpu.org/documentation/outputs/

    - - Journal Articles Need Interactive Graphics + + mvabund - new R pkg for multivariate abundance data

    - + -

    I should have thought of it earlier: In a day and age when we are increasingly reading scientific literature on computer screens, why is it that we limit our peer-reviewed data representation to static, unchanging graphs and plots? Why do we not try to create dynamic visualizations of our rich and varied data sets? Would we not derive benefits in the quality and clarity of scientific discourse from publishing these visualizations?

    + There is a new R package in town, mvabund, which does, as they say "statistical methods for analysing multivariate abundance data". The authors introduced the paper in an online early paper in Methods in Ecology and Evolution [here][], R package [here][here2]. -

    An article in the very good (and under-appreciated, in my opinion) American Scientist magazine written by Brian Hayes started me thinking about these questions. "Pixels or Perish" begins by recapping the evolution of graphics in scientific publications and notes that before people were good at making plots digitally, they were good at making figures from using photographic techniques; and before that, from elaborate engravings. Clearly, the state-of-the-art in scientific publishing is a moving target.

    +The package is meant to visualize data, fit predictive models, check model assumptions, and test hypotheses about community-environment associations. -

    Hayes points out that one of the primary advantages of static images is that everyone knows how to use them and that almost no one lacks the tools to view them. That is, printed images in a magazine or static digital images in the portable document format (pdf) are easily viewed on paper or on a screen and can be readily interpreted by a wide audience. While I agree that this feature is very important, why have we not, as scientists, moved to the next level? We do not lack the ability to interpret data--it is our job to do so--not to mention that we are some of the heaviest generators of data in the first place.

    +Here is a quick example. -

    The obstacles to progress towards interactive data are two-fold. First, generating dynamic data visualizations is not as easy as generating static plots. The data visualization tools simply are not as well developed and they do not show up as frequently in the programming environments in which scientists work. One example Hayes cites is that the ideas from programs such as D^3 have not yet made an appearance in software, like R and Matlab, that more scientists use. This is one reason why I am so excited by the work that our very own Scott has been doing with this Recology blog, in trying to promote awareness of tools in R.

    + -

    The second is that neither of our currently dominant publishing formats (physical paper and digital pdf files) support dynamic graphics. Hayes says it better than I could: "…the Web is not where scientists publish…[publications are]…available through the Web, not on the Web." So, not many current publications really take advantage of the new capabilities that the Web has offered us to showcase dynamic data sets. In fact, while Science and Nature--just to name two prominent examples of scientific journals--make available HTML versions of their articles, it seems like most of the interactivity is limited to looking at larger versions of figures in the articles*. I myself usually just download the pdf version of articles rather than viewing the HTML version. This obstacle, however, is not a fundamental one; it is only the current situation.

    +![mvabund1](/public/img/mvabund1.png) -

    The more serious obstacle that Hayes foresees in transitioning to dynamic graphics is one of archiving. Figures in journal articles printed in 1900 are still readable today, but there is no guarantee that a particular file format will survive in usable form to 2100, or even 2020. I do not know the answer to this conundrum. A balance might need to be struck between generating static and dynamic data. At least in the medium term, papers should probably also contain static versions of figures representing dynamic data sets. It is inelegant, but it could avoid the situation where we lose access to information that was once there.

    +![mvabund2](/public/img/mvabund2.png) -

    That said, if the New York Times can do it, so can we. We should not wait to make our data presentation more dynamic and interactive. At first, it will be difficult to incorporate these kinds of figures into the articles themselves, and they will likely be relegated to the "supplemental material" dead zone that is infrequently viewed. But the more dynamic material that journals receive from authors, the more incentive they will have to expand upon their current offerings. Ultimately, doing so will greatly improve the quality of scientific discourse.

    -

    * Whether the lack of dynamic data visualization on these journals' websites is due to the authors not submitting such material or due to restrictions from the journals themselves, I do not know. I suspect the burden falls more on the authors' shoulders at this point than the journals'.

    +[here]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2012.00190.x/full +[here2]: http://cran.r-project.org/web/packages/mvabund/index.html
    diff --git a/_site/page32/index.html b/_site/page32/index.html index 77630a7117..7cbfd0d1f3 100644 --- a/_site/page32/index.html +++ b/_site/page32/index.html @@ -59,6 +59,44 @@

    Recology

      +
    +

    + + Journal Articles Need Interactive Graphics + +

    + + + + I should have thought of it earlier: In a day and age when we are increasingly reading scientific literature on computer screens, why is it that we limit our peer-reviewed data representation to static, unchanging graphs and plots? Why do we not try to create dynamic visualizations of our rich and varied data sets? Would we not derive benefits in the quality and clarity of scientific discourse from publishing these visualizations? + +An article in the very good (and under-appreciated, in my opinion) *[American Scientist][]* magazine written by Brian Hayes started me thinking about these questions. ["Pixels or Perish"][PorP] begins by recapping the evolution of graphics in scientific publications and notes that before people were good at making plots digitally, they were good at making figures from using photographic techniques; and before that, from elaborate engravings. Clearly, the state-of-the-art in scientific publishing is a moving target. + +Hayes points out that one of the primary advantages of static images is that everyone knows how to use them and that almost no one lacks the tools to view them. That is, printed images in a magazine or static digital images in the portable document format (pdf) are easily viewed on paper or on a screen and can be readily interpreted by a wide audience. While I agree that this feature is very important, why have we not, as scientists, moved to the next level? We do not lack the ability to interpret data--it is our job to do so--not to mention that we are some of the heaviest generators of data in the first place. + +The obstacles to progress towards interactive data are two-fold. First, generating dynamic data visualizations is not as easy as generating static plots. The data visualization tools simply are not as well developed and they do not show up as frequently in the programming environments in which scientists work. One example Hayes cites is that the ideas from programs such as [D^3][dthree] have not yet made an appearance in software, like [R][] and [Matlab][], that more scientists use. This is one reason why I am so excited by the work that our very own [Scott][] has been doing with this [Recology][] blog, in trying to promote awareness of tools in [R][]. + +The second is that neither of our currently dominant publishing formats (physical paper and digital pdf files) support dynamic graphics. Hayes says it better than I could: "…the Web is not where scientists publish…\[publications are\]…available *through* the Web, not *on* the Web." So, not many current publications really take advantage of the new capabilities that the Web has offered us to showcase dynamic data sets. In fact, while [Science][] and [Nature][]--just to name two prominent examples of scientific journals--make available HTML versions of their articles, it seems like most of the interactivity is limited to looking at larger versions of figures in the articles\*. I myself usually just download the pdf version of articles rather than viewing the HTML version. This obstacle, however, is not a fundamental one; it is only the current situation. + +The more serious obstacle that Hayes foresees in transitioning to dynamic graphics is one of archiving. Figures in journal articles printed in 1900 are still readable today, but there is no guarantee that a particular file format will survive in usable form to 2100, or even 2020. I do not know the answer to this conundrum. A balance might need to be struck between generating static and dynamic data. At least in the medium term, papers should probably also contain static versions of figures representing dynamic data sets. It is inelegant, but it could avoid the situation where we lose access to information that was once there. + +That said, if the [New York Times][nytimes] can do it, so can we. We should not wait to make our data presentation more dynamic and interactive. At first, it will be difficult to incorporate these kinds of figures into the articles themselves, and they will likely be relegated to the "supplemental material" dead zone that is infrequently viewed. But the more dynamic material that journals receive from authors, the more incentive they will have to expand upon their current offerings. Ultimately, doing so will greatly improve the quality of scientific discourse. + +[nytimes]: http://www.nytimes.com +[American Scientist]: http://www.americanscientist.org/ +[PorP]: http://www.americanscientist.org/issues/pub/pixels-or-perish +[dthree]: http://vis.stanford.edu/files/2011-D3-InfoVis.pdf +[R]: http://www.r-project.org/ +[Matlab]: http://www.mathworks.com/products/matlab/ +[Scott]: http://sckott.github.io/recologyabout.html +[Science]: http://www.sciencemag.org/ +[Nature]: http://wwww.nature.com +[Recology]: http://sckott.github.io/ + +* Whether the lack of dynamic data visualization on these journals' websites is due to the authors not submitting such material or due to restrictions from the journals themselves, I do not know. I suspect the burden falls more on the authors' shoulders at this point than the journals'. + +
    +

    @@ -68,11 +106,15 @@

    -

    Many ecologists are R users, but we vary in our understanding of the math and statistical theory behind models we use. There is no clear consensus on what should be the basic mathematical training of ecologists.

    + Many ecologists are R users, but we vary in our understanding of the math and statistical theory behind models we use. There is no clear consensus on what should be the basic mathematical training of ecologists. + +To learn what the community thinks, we invite you to fill out a short and anonymous questionnaire on this topic [here][]. -

    To learn what the community thinks, we invite you to fill out a short and anonymous questionnaire on this topic here.

    +The questionnaire was designed by [Frédéric Barraquand][fred], a graduate student at Université Pierre et Marie Curie, in collaboration with the International Network of Next-Generation Ecologists ([INNGE][]). -

    The questionnaire was designed by Frédéric Barraquand, a graduate student at Université Pierre et Marie Curie, in collaboration with the International Network of Next-Generation Ecologists (INNGE).

    +[here]: https://sites.google.com/site/mathematicsandecologysurvey/ +[fred]: http://www.cebc.cnrs.fr/Fidentite/barraquand/barraquand.htm +[INNGE]: http://www.innge.net

    @@ -85,46 +127,36 @@

    -

    So Flora of North America is an awesome collection of taxonomic information for plants across the continent. However, the information within is not easily machine readable.

    + So [Flora of North America][fna] is an awesome collection of taxonomic information for plants across the continent. However, the information within is not easily machine readable. -

    So, a little web scraping is called for.

    +So, a little web scraping is called for. -

    rfna is an R package to collect information from the Flora of North America.

    +[rfna][] is an R package to collect information from the Flora of North America. -

    So far, you can: +So far, you can: 1. Get taxonomic names from web pages that index the names. 2. Then get daughter URLs for those taxa, which then have their own 2nd order daughter URLs you can scrape, or scrape the 1st order daughter page. -3. Query Asteraceae taxa for whether they have paleate or epaleate receptacles. This function is something I needed, but more functions will be made like this to get specific traits.

    +3. Query Asteraceae taxa for whether they have paleate or epaleate receptacles. This function is something I needed, but more functions will be made like this to get specific traits. -

    Further functions will do search, etc.

    +Further functions will do search, etc. -

    You can install by:

    +You can install by: -
    install.packages("devtools")
    -require(devtools)
    -install_github("rfna", "rOpenSci")
    -require(rfna)
    - -

    Here is an example where a set of URLs is acquired using function getdaughterURLs, then the function receptacle is used to ask whether of each the taxa at those URLs have paleate or epaleate receptacles.

    - - +{% highlight r %} +install.packages("devtools") +require(devtools) +install_github("rfna", "rOpenSci") +require(rfna) +{% endhighlight %} -

    - -
    -

    - - RNetLogo - A package for running NetLogo from R - -

    +Here is an example where a set of URLs is acquired using function ```getdaughterURLs```, then the function ```receptacle``` is used to ask whether of each the taxa at those URLs have paleate or epaleate receptacles. - -

    Described in a new Methods in Ecology and Evolution paper here, a new R package RNetLogo allows you to use NetLogo from R.

    + -

    NetLogo is software is a "multi-agent programmable modeling environment". NetLogo can be used in individual- and agent-based modeling, and is used in the book Agent-based and Individual-based Modeling: A Practical Introduction by Railsback & Grimm.

    -

    I have not tried the package yet, but looks interesting. I am always a fan of running stand-alone programs from R if possible.

    +[fna]: http://fna.huh.harvard.edu/ +[rfna]: https://github.com/ropensci/rfna
    diff --git a/_site/page33/index.html b/_site/page33/index.html index 6c31f01a43..f63b6327e3 100644 --- a/_site/page33/index.html +++ b/_site/page33/index.html @@ -61,149 +61,145 @@

    Recology

    - - Taking a Closer Look at Peer Review + + RNetLogo - A package for running NetLogo from R

    - - -

    This post is only tangentially about open science. It is more directly about the process of peer review and how it might be improved. I am working on a follow-up post about how these points can be addressed in an open publishing environment.

    - -

    A recent paper on the arXiv got me thinking about the sticking points in the publishing pipeline. As it stands, most scientists have a pretty good understanding of how peer reviewed publishing is supposed to work. Once an author—or more likely, a group of authors—decides that a manuscript is ready for action, the following series of events will occur:

    - -
      -
    1. the authors submit the manuscript to the journal of choice;
    2. -
    3. the journal's editor makes a general decision about whether the article is appropriate for the journal;
    4. -
    5. in the affirmative case, the editor selects referees for the manuscript and sends them the text for review;
    6. -
    7. the referees return reviews of the manuscript (the referees are not typically identified to the authors);
    8. -
    9. the editor makes the decision to reject the manuscript, accept it with minor revisions, or accept it with major revisions. Rejected manuscripts usually start over the process in another journal. Minor revisions to accepted manuscripts are usually made quickly and publication proceeds. In the case of major revisions, the suggested changes are made, if possible, and the manuscript is returned to the editor. At this point, the referees may get a second crack at the material (but not necessarily), before the editor makes a final accept/reject decision based on the feedback from the referees.
    10. -
    - -

    Peer review of manuscripts exists for several reasons. For one, self-regulation determines the suitability of the material for publication if it was not already obvious to the editor of the journal. Having peer reviewers also improves the material and its presentation. Furthermore, having expert reviewers lends credibility to the work and insures that misleading, wrong, or crackpot material does not receive the stamp of credibility. Finally, finding appropriately skilled referees spreads the workload beyond the editors, who may not have the resources to evaluate every paper arriving at their desk.

    - -

    Though peer review has a storied history, it also has its drawbacks. First, and perhaps foremost, the process is often a slow one, with many months elapsing during even one round of communications between the authors, the editor, and the referees. Peer review is not always an objective process either: referees have the power to delay, or outright reject, work that their competitors have completed, and thus they may lose their impartiality in the process. Additionally, the publishing process does not reveal the feedback process that occurs between authors and referees, which can be a scientifically and pedagogically valuable exchange.

    + -

    One proposal to address the shortcomings of the peer review process (alluded to in the first paragraph) was posted by Sergey Bozhevolnyi on the arXiv, a pre-publication website for many physics-related manuscripts. Bozhevolnyi calls his model of publishing Rapid, Impartial, and Comprehensive (RIC) publishing. To him, "rapid" means that editors should approve or reject manuscripts before the manuscripts are sent to the referees for review. Then, "impartial" means that referees, who might otherwise have an interest in rejecting a perfectly fine paper, lose the power to dictate whether or not a manuscript is published. Instead, the referees critique the paper without assessing whether it is publication-worthy. Lastly, "comprehensive" involves publishing everything having to do with the manuscript. That is, all positive and negative reviews are published in conjunction with the all versions of a manuscript.

    + Described in a new Methods in Ecology and Evolution paper [here][], a new [R][] package [RNetLogo][] allows you to use [NetLogo][] from R. -

    The primary benefit of RIC, according to Bozhevolnyi, is that it saves the energies of authors, editors, and referees, thus allowing them all to do more research and less wrangling. Since most papers are ultimately accepted somewhere, then we should not cause additional delays in publishing by first rejecting them in multiple places. Instead, collate the manuscript and the reviews and publish them all together, along with any revisions to the manuscript. Having the reviews be publicly viewable will encourage referees to be more careful about writing their critiques and supporting their assertions, and the process as a whole will be more transparent than it currently is.

    +NetLogo is software is a "multi-agent programmable modeling environment". NetLogo can be used in individual- and agent-based modeling, and is used in the book [_Agent-based and Individual-based Modeling: A Practical Introduction_][book] by Railsback & Grimm. -

    Before I critique the RIC publishing proposal, I should point out that some aspects of the proposal are very appealing. I particularly like the idea of publishing all reviews in addition to the manuscript. That said, I find it difficult to believe that the incentives for authors and referees change for the better under this proposal. For example, what happens if authors receive feedback, do not wish to invest the time to address the critique, and subsequently allow the original manuscript and the reviews to stand as they are? This situation seems like a moral hazard for authors that does a disservice to the quality of scientific literature. On the part of the referees, does removing decision-making authority make reviewing less appealing? Disempowering the referees by potentially ignoring their critique and only counting it as a minor part of the publishing process will not motivate them to write better reviews. In the case of editors, what makes us believe that an editor, or an editorial board, has the background to properly evaluate the suitability of work for acceptance into a journal? The reason we have referees in the current peer review system is because they have the very expertise and familiarity needed for this task.

    +I have not tried the package yet, but looks interesting. I am always a fan of running stand-alone programs from R if possible. -

    Does the fact that Bozhevolnyi's RIC proposal does not make sense mean that peer review is fine as it is? I do not think so. Instead, it is worth asking what parts of peer review we like and what parts we would like to improve. I posit that rejection, or the threat of rejection, is the greatest motivator for authors to make necessary changes to their manuscript. As such, rejection by peers is still the best way to require and receive revisions. Though I think that referees should retain their rejecting power (and their anonymity!), I feel strongly that the entire peer review process would benefit from the increased transparency and accountability that publishing unsigned reviews would add. As far as editors, they play a role in shaping the kind of journal they run by selecting appropriate material on a general level, but they should not play too large a role in determining the "important" research in any field. The model used by the journal [Public Library of Science One][] is a promising one in this regard, with the only acceptance criterion being whether the science is sound.

    - -

    The amount of time that it takes to publish is one of the most frustrating aspects of peer review, however. Journals could voluntarily publish time-to-publication figures, a number which could then be used by authors—along with impact factors and acceptance rates—to decide which journals to submit to. For instance, an editor of the Journal of Orthodontics writes about just this fact in an editorial. A Google search for "journal time to publication" reveals that people have been thinking about this problem for a while (e.g. computer science comparisons), but no general standard exists across journals. In fact, I suspect these are numbers most journals are afraid will hurt them more than help them. Nevertheless, journals acknowledge the demand for rapid publication when they offer services like [Springer's Fast Track publishingfasttrackpub or Physical Review's Rapid Communications.

    - -

    Ultimately, it may not matter what journals do because authors are routing around this problem via pre-publication archives such as the arXiv for physics-related subject matter. Though not without complications, especially in the health sciences (see, for example, "The Promise and Perils of Pre-Publication Review"), pre-publication allows authors to communicate results and establish priority without stressing about getting through the peer review process as fast as possible. Instead, the process takes its normal, slower course while authors move along their on-going research.

    - -

    I will conclude by leaving an open question that I may address in a future post: how do you encourage peer reviewers to do the best possible job, in a timely manner, without only relying on their altruism to doing good science and being good members of the community? It is this question about peer review, I feel, that is the most fraught with complication and subject to the law of unintended consequences if the incentives are changed.

    +[RNetLogo]: http://cran.r-project.org/web/packages/RNetLogo/index.html +[NetLogo]: http://ccl.northwestern.edu/netlogo/ +[here]: http://onlinelibrary.wiley.com/doi/10.1111/j.2041-210X.2011.00180.x/abstract +[R]: http://cran.r-project.org/ +[book]: http://www.railsback-grimm-abm-book.com/

    - - Function for phylogeny resolution + + Taking a Closer Look at Peer Review

    - - -

    UPDATE: Yeah, so the treeresstats function had a problem in one of the calculations. I fixed that and added some more calulcations to the function.

    + -

    I couldn't find any functions to calculate number of polytomies, and related metrics.

    + This post is only tangentially about open science. It is more directly about the process of peer review and how it might be improved. I am working on a follow-up post about how these points can be addressed in an open publishing environment. -

    Here's a simple function that gives four metrics on a phylo tree object:

    +A [recent paper on the arXiv][arXivpaper] got me thinking about the sticking points in the publishing pipeline. As it stands, most scientists have a pretty good understanding of how peer reviewed publishing is supposed to work. Once an author—or more likely, a group of authors—decides that a manuscript is ready for action, the following series of events will occur: - +1. the authors submit the manuscript to the journal of choice; +2. the journal's editor makes a general decision about whether the article is appropriate for the journal; +3. in the affirmative case, the editor selects referees for the manuscript and sends them the text for review; +4. the referees return reviews of the manuscript (the referees are not typically identified to the authors); +5. the editor makes the decision to reject the manuscript, accept it with minor revisions, or accept it with major revisions. Rejected manuscripts usually start over the process in another journal. Minor revisions to accepted manuscripts are usually made quickly and publication proceeds. In the case of major revisions, the suggested changes are made, if possible, and the manuscript is returned to the editor. At this point, the referees may get a second crack at the material (but not necessarily), before the editor makes a final accept/reject decision based on the feedback from the referees. -

    Here's output from the gist above:

    +Peer review of manuscripts exists for several reasons. For one, self-regulation determines the suitability of the material for publication if it was not already obvious to the editor of the journal. Having peer reviewers also improves the material and its presentation. Furthermore, having expert reviewers lends credibility to the work and insures that misleading, wrong, or crackpot material does not receive the stamp of credibility. Finally, finding appropriately skilled referees spreads the workload beyond the editors, who may not have the resources to evaluate every paper arriving at their desk. -
    $trsize_tips
    -[1] 15
    +Though peer review has a storied history, it also has its drawbacks.  First, and perhaps foremost, the process is often a slow one, with many months elapsing during even one round of communications between the authors, the editor, and the referees.  Peer review is not always an objective process either: referees have the power to delay, or outright reject, work that their competitors have completed, and thus they may lose their impartiality in the process.  Additionally, the publishing process does not reveal the feedback process that occurs between authors and referees, which can be a scientifically and pedagogically valuable exchange.
     
    -$trsize_nodes
    -[1] 13
    +[One proposal][arXivpaper] to address the shortcomings of the peer review process (alluded to in the first paragraph) was posted by Sergey Bozhevolnyi on the [arXiv][arXiv], a pre-publication website for many physics-related manuscripts.  Bozhevolnyi calls his model of publishing Rapid, Impartial, and Comprehensive (RIC) publishing.  To him, "rapid" means that editors should approve or reject manuscripts before the manuscripts are sent to the referees for review.  Then, "impartial" means that referees, who might otherwise have an interest in rejecting a perfectly fine paper, lose the power to dictate whether or not a manuscript is published.  Instead, the referees critique the paper without assessing whether it is publication-worthy.  Lastly, "comprehensive" involves publishing everything having to do with the manuscript.  That is, all positive and negative reviews are published in conjunction with the all versions of a manuscript.
     
    -$numpolys
    -[1] 1
    +The primary benefit of RIC, according to Bozhevolnyi, is that it saves the energies of authors, editors, and referees, thus allowing them all to do more research and less wrangling.  Since most papers are ultimately accepted somewhere, then we should not cause additional delays in publishing by first rejecting them in multiple places.  Instead, collate the manuscript and the reviews and publish them all together, along with any revisions to the manuscript.  Having the reviews be publicly viewable will encourage referees to be more careful about writing their critiques and supporting their assertions, and the process as a whole will be more transparent than it currently is.
     
    -$numpolysbytrsize_tips
    -[1] 0.06666667
    +Before I critique the RIC publishing proposal, I should point out that some aspects of the proposal are very appealing.  I particularly like the idea of publishing all reviews in addition to the manuscript.  That said, I find it difficult to believe that the incentives for authors and referees change for the better under this proposal.  For example, what happens if authors receive feedback, do not wish to invest the time to address the critique, and subsequently allow the original manuscript and the reviews to stand as they are?  This situation seems like a moral hazard for authors that does a disservice to the quality of scientific literature.  On the part of the referees, does removing decision-making authority make reviewing less appealing?  Disempowering the referees by potentially ignoring their critique and only counting it as a minor part of the publishing process will not motivate them to write better reviews.  In the case of editors, what makes us believe that an editor, or an editorial board, has the background to properly evaluate the suitability of work for acceptance into a journal?  The reason we have referees in the current peer review system is because they have the very expertise and familiarity needed for this task.
     
    -$numpolysbytrsize_nodes
    -[1] 0.07692308
    +Does the fact that Bozhevolnyi's RIC proposal does not make sense mean that peer review is fine as it is?  I do not think so.  Instead, it is worth asking what parts of peer review we like and what parts we would like to improve.  I posit that rejection, or the threat of rejection, is the greatest motivator for authors to make necessary changes to their manuscript.  As such, rejection by peers is still the best way to require and receive revisions.  Though I think that referees should retain their rejecting power (and their anonymity!), I feel strongly that the entire peer review process would benefit from the increased transparency and accountability that publishing unsigned reviews would add.  As far as editors, they play a role in shaping the kind of journal they run by selecting appropriate material on a general level, but they should not play too large a role in determining the "important" research in any field.  The model used by the journal [Public Library of Science One][] is a promising one in this regard, with the only acceptance criterion being whether the science is sound.
     
    -$proptipsdescpoly
    -[1] 0.2
    +The amount of time that it takes to publish is one of the most frustrating aspects of peer review, however.  Journals could voluntarily publish time-to-publication figures, a number which could then be used by authors—along with impact factors and acceptance rates—to decide which journals to submit to.  For instance, an editor of the Journal of Orthodontics writes about just this fact in [an editorial][orthodonticseditorial].  A Google search for "journal time to publication" reveals that people have been thinking about this problem for a while (e.g. [computer science comparisons][compscicomps]), but no general standard exists across journals.  In fact, I suspect these are numbers most journals are afraid will hurt them more than help them.  Nevertheless, journals acknowledge the demand for rapid publication when they offer services like [Springer's Fast Track publishing[fasttrackpub] or [Physical Review's Rapid Communications][rapidcomm].
     
    -$propnodesdich
    -[1] 0.9230769
    +Ultimately, it may not matter what journals do because authors are routing around this problem via pre-publication archives such as the [arXiv][arXiv] for physics-related subject matter. Though not without complications, especially in the health sciences (see, for example, ["The Promise and Perils of Pre-Publication Review"][prepubperils]), pre-publication allows authors to communicate results and establish priority without stressing about getting through the peer review process as fast as possible. Instead, the process takes its normal, slower course while authors move along their on-going research. -

    And an example with many trees:

    +I will conclude by leaving an open question that I may address in a future post: how do you encourage peer reviewers to do the best possible job, in a timely manner, without only relying on their altruism to doing good science and being good members of the community? It is this question about peer review, I feel, that is the most fraught with complication and subject to the law of unintended consequences if the incentives are changed. - - - - - - - - - - - - - - - - - - - - - - - - - -
    trsize_tipstrsize_nodesnumpolysnumpolysbytrsize_tipsnumpolysbytrsize_nodesproptipsdescpolypropnodesdich
    20 13 4 0.20 0.31 0.7 0.69
    20 7 3 0.15 0.43 0.9 0.57
    20 11 6 0.30 0.55 1.0 0.45
    20 13 4 0.20 0.31 0.7 0.69
    20 9 5 0.25 0.56 1.0 0.44
    +[arXivpaper]: http://arxiv.org/abs/1110.0791 +[arXiv]: http://arxiv.org/ +[PLOSone]: http://www.plosone.org/static/reviewerGuidelines.action#about +[orthodonticseditorial]: http://jorthod.maneyjournals.org/content/29/3/171.full +[compscicomps]: http://www.hutter1.net/journals.htm +[fasttrackpub]: http://www.springer.com/societies+%26+publishing+partners/society+%26+partner+zone?SGWID=0-173202-12-772912-0 +[rapidcomm]: http://pra.aps.org/highlighting-rapids +[prepubperils]: http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0010782

    - - Moving from blogger and wordpress to jekyll + + Function for phylogeny resolution

    - + -

    Recology used to be hosted on Blogger, and my personal website was hosted on Wordpress. Neither platform was very satisfying. Blogger is very limited in their layouts, unless you use dynamic views, which suck because they don't allow javascript snippets to render GitHub gists. Wordpress is just limited all around as you can't put in hardly anythig excep text and some pictures. They both have their place, but not so much for content that requires syntax highlighting, references, etc.

    + UPDATE: Yeah, so the treeresstats function had a problem in one of the calculations. I fixed that and added some more calulcations to the function. -

    Jekyll powered sites on GitHub are an awesome alternative. You do have to write the code yourself, but you can copy any number of templates on GitHub with a simple git clone onto your machine, edit the text a bit, push it up to GitHub, and that's it.

    +I couldn't find any functions to calculate number of polytomies, and related metrics. -

    On Blogger and Wordpress you can't see the code behind why different blogs/sites look different. But on Jekyll/GitHub you can see the code behind each site (see here for a list of Jekyll/GitHub sites and their source code), which makes learning so easy.

    +Here's a simple function that gives four metrics on a phylo tree object: -

    Here is a video on YouTube that explains in some detail Jekyll/GitHub sites:

    + + +Here's output from the gist above: + +{% highlight r %} +$trsize_tips +[1] 15 + +$trsize_nodes +[1] 13 - +$numpolys +[1] 1 -

    A great point in the video above is that a Jekyll site allows a workflow that is great not only for code-junkies, but for scientists. What is the most important thing about science? That it is reproducible of course. Documenting your code and sharing with everyone on GitHub or SVN, etc. is great for science in facilitating collaboration and facilitating transparency. Having your website/blog on Jekyll fits right in to this workflow (that is, pull down any changes - write/edit something - commit - push to GitHub). Although this sort of worklow isn't necessary for a blog, it is nice for scientists to use this workflow all the time.

    +$numpolysbytrsize_tips +[1] 0.06666667 -

    Here's how to get started:

    +$numpolysbytrsize_nodes +[1] 0.07692308 -
      -
    1. Install git
    2. -
    3. Get a free GitHub account and configure GitHub. If you are afraid of the command line, there is a great GitHub app here.
    4. -
    5. git clone a jekyll template to your machine. There are hundreds of these now. Look here for your favorite, and git clone it. ***
    6. -
    7. Edit the template you have cloned, and commit and push to GitHub. That's it. It will take just a bit to render.
    8. -
    +$proptipsdescpoly +[1] 0.2 -

    There is more to it than that, but that is how you can get started. If you want to add comments, Disqus is a great option. Once you fork someones jekyll site, make sure to change all the personal/site specific information to your information, including the RSS feed.

    +$propnodesdich +[1] 0.9230769 +{% endhighlight %} -

    *** Note: You can name your repo for your site/blog as yourgithubname.github.com if you want your URL for the site to be http://yourgithubname.github.com. Or you can name your repo whatever you want, e.g., disrepo, then the URL will be http://yourgithubname.github.com/disrepo.

    +And an example with many trees: + + + + + + + + + + + + + + + + + + + + + + + + + + +
    trsize_tipstrsize_nodesnumpolysnumpolysbytrsize_tipsnumpolysbytrsize_nodesproptipsdescpolypropnodesdich
    20 13 4 0.20 0.31 0.7 0.69
    20 7 3 0.15 0.43 0.9 0.57
    20 11 6 0.30 0.55 1.0 0.45
    20 13 4 0.20 0.31 0.7 0.69
    20 9 5 0.25 0.56 1.0 0.44
    diff --git a/_site/page34/index.html b/_site/page34/index.html index 086057798e..61732d5570 100644 --- a/_site/page34/index.html +++ b/_site/page34/index.html @@ -61,93 +61,110 @@

    Recology

    - - Presenting results of logistic regression + + Moving from blogger and wordpress to jekyll

    - + -

    So my advisor pointed out this 'new' (well, 2004), way of plotting results of logistic regression results. The idea was presented in a 2004 Bulletin of the Ecological Society of America issue (here). I tried to come up with a solution using, what else, ggplot2. I don't have it quite all the way down - I am missing the second y-axis values for the histograms, but someone smarter than me can figure that part out (note that Hadley doesn't want to support second y-axes in ggplot2, but they can probably be hacked on).

    + Recology used to be hosted on Blogger, and my personal website was hosted on Wordpress. Neither platform was very satisfying. Blogger is very limited in their layouts, unless you use dynamic views, which suck because they don't allow javascript snippets to render GitHub gists. Wordpress is just limited all around as you can't put in hardly anythig excep text and some pictures. They both have their place, but not so much for content that requires syntax highlighting, references, etc. -

    Here's the code: -

    +[Jekyll][] powered sites on [GitHub][] are an awesome alternative. You do have to write the code yourself, but you can copy any number of templates on GitHub with a simple `git clone` onto your machine, edit the text a bit, push it up to GitHub, and that's it. -

    Here's a few examples using datasets provided with the ggplot2 package:

    +On Blogger and Wordpress you can't see the code behind why different blogs/sites look different. But on Jekyll/GitHub you can see the code behind each site (see [here][] for a list of Jekyll/GitHub sites and their source code), which makes learning so easy. -
    loghistplot(mtcars[,c("mpg","vs")])
    +Here is a video on YouTube that explains in some detail Jekyll/GitHub sites: -

    mtcars plot

    + -
    loghistplot(movies[,c("rating","Action")])
    +A great point in the video above is that a Jekyll site allows a workflow that is great not only for code-junkies, but for scientists. What is the most important thing about science? That it is reproducible of course. Documenting your code and sharing with everyone on GitHub or SVN, etc. is great for science in facilitating collaboration and facilitating transparency. Having your website/blog on Jekyll fits right in to this workflow (that is, pull down any changes - write/edit something - commit - push to GitHub). Although this sort of worklow isn't necessary for a blog, it is nice for scientists to use this workflow all the time. -

    movies plot

    +Here's how to get started: -

    And two examples of the logpointplot function:

    +1. [Install git][git] +2. [Get a free GitHub account][getgithub] and [configure GitHub][configgithug]. If you are afraid of the command line, there is a great GitHub app [here][here3]. +3. `git clone` a jekyll template to your machine. There are hundreds of these now. Look [here][here2] for your favorite, and `git clone` it. *** +4. Edit the template you have cloned, and commit and push to GitHub. That's it. It will take just a bit to render. -
    logpointplot(mtcars[,c("mpg","vs")])
    +There is more to it than that, but that is how you can get started. If you want to add comments, [Disqus][] is a great option. Once you fork someones jekyll site, make sure to change all the personal/site specific information to your information, including the RSS feed. -

    mtcars point plot

    +*** Note: You can name your repo for your site/blog as yourgithubname.github.com if you want your URL for the site to be http://yourgithubname.github.com. Or you can name your repo whatever you want, e.g., disrepo, then the URL will be http://yourgithubname.github.com/disrepo. -
    logpointplot(movies[,c("rating","Action")])
    -

    movies point plot

    +[Jekyll]: https://github.com/mojombo/jekyll +[GitHub]: https://github.com/ +[here]: https://github.com/mojombo/jekyll/wiki/sites +[git]: http://git-scm.com/ +[getgithub]: https://github.com/signup/free +[here2]: https://github.com/mojombo/jekyll/wiki/sites +[here3]: http://mac.github.com/ +[configgithug]: http://help.github.com/mac-set-up-git/ +[Disqus]: http://disqus.com/

    - - Testing twitterfeed + + Presenting results of logistic regression

    - + -

    Does this work on twitterfeed?

    + So my advisor pointed out this 'new' (well, 2004), way of plotting results of logistic regression results. The idea was presented in a 2004 Bulletin of the Ecological Society of America issue ([here][]). I tried to come up with a solution using, what else, ggplot2. I don't have it quite all the way down - I am missing the second y-axis values for the histograms, but someone smarter than me can figure that part out (note that Hadley doesn't want to support second y-axes in ggplot2, but they can probably be hacked on). -
    - -
    -

    - - Weecology can has new mammal dataset - -

    +Here's the code: + - -

    So the Weecology folks have published a large dataset on mammal communities in a data paper in Ecology. I know nothing about mammal communities, but that doesn't mean one can't play with the data...

    +Here's a few examples using datasets provided with the ggplot2 package: -

    Their dataset consists of five csv files:

    +{% highlight r %} +loghistplot(mtcars[,c("mpg","vs")]) +{% endhighlight %} -
      -
    • communities,
    • -
    • references,
    • -
    • sites,
    • -
    • species, and
    • -
    • trapping data
    • -
    +![mtcars plot](/public/img/mtcarsplot.png) -

    Where are these sites, and by the way, do they vary much in altitude?

    -

    +{% highlight r %} +loghistplot(movies[,c("rating","Action")]) +{% endhighlight %} -

    +![movies plot](/public/img/moviesplot.png) -

    Let's zoom in on just the states

    -

    +And two examples of the logpointplot function: -

    What phylogenies can we get for the species in this dataset?

    +{% highlight r %} +logpointplot(mtcars[,c("mpg","vs")]) +{% endhighlight %} -

    We can use the rOpenSci package treebase to search the online phylogeny repository TreeBASE. Limiting to returning a max of 1 tree (to save time), we can see that X species are in at least 1 tree on the TreeBASE database. Nice.

    +![mtcars point plot](/public/img/logpointplot1.png) -

    So there are 321 species in the database with at least 1 tree in the TreeBASE database. Of course there could be many more, but we limited results from TreeBASE to just 1 tree per query.

    -

    Here's the code:

    +{% highlight r %} +logpointplot(movies[,c("rating","Action")]) +{% endhighlight %} + +![movies point plot](/public/img/logpointplot2.png) + + +[here]: http://esapubs.org/bulletin/backissues/085-3/bulletinjuly2004_2column.htm#tools1 + +
    + +
    +

    + + Testing twitterfeed + +

    + + - + Does this work on twitterfeed?
    diff --git a/_site/page35/index.html b/_site/page35/index.html index ab87c909ff..2780b23f9b 100644 --- a/_site/page35/index.html +++ b/_site/page35/index.html @@ -61,89 +61,111 @@

    Recology

    - - Recology is 1 yr old + + Weecology can has new mammal dataset

    - - -

    This blog has lasted a whole year already.  Thanks for reading and commenting.

    + -

    There are a couple of announcements:

    + So the [Weecology][] folks have published a large dataset on mammal communities in a data paper in [Ecology][]. I know nothing about mammal communities, but that doesn't mean one can't play with the data... -
      -
    1. Less blogging: I hope to put in many more years blogging here, but in full disclosure I am blogging for Journal of Ecology now, so I am going to be (and already have been) blogging less here.
    2. -
    3. More blogging: If anyone wants to write guest posts at Recology on the topics of using R for ecology and evolution, or open science, please contact me.
    4. -
    5. Different blogging: I was going to roll out the new dynamic views for this blog, but Google doesn't allow javascript, which is how I include code using GitHub gists. Oh well...
    6. -
    +### Their dataset consists of five csv files: ++ communities, ++ references, ++ sites, ++ species, and ++ trapping data -

    Anywho, here is the breakdown of visits to this blog, visualized using #ggplot2, of course. There were a total of about 23,000 pageviews in the first year of this blog.

    +### Where are these sites, and by the way, do they vary much in altitude? -

    Here is the pie chart code I used: -

    + -

    Visits to top ten posts:

    +
    -

    +### Let's zoom in on just the states -

    Visits by by pages:

    +
    -

    +### What phylogenies can we get for the species in this dataset? -

    Visits by top referring sites:

    +We can use the [rOpenSci package treebase] to search the online phylogeny repository [TreeBASE][]. Limiting to returning a max of 1 tree (to save time), we can see that X species are in at least 1 tree on the TreeBASE database. Nice. -

    +So there are 321 species in the database with at least 1 tree in the TreeBASE database. Of course there could be many more, but we limited results from TreeBASE to just 1 tree per query. -

    Visits by country:

    +### Here's the code: -

    + -

    Visits by browsers:

    -

    -

    Visits by operating system:

    - -

    +[Weecology]: http://weecology.org/ +[Ecology]: http://www.esajournals.org/doi/abs/10.1890/11-0262.1 +[rOpenSci package treebase]: http://cran.r-project.org/web/packages/treebase/ +[TreeBASE]: http://www.treebase.org/treebase-web/home.html

    - - Dynamic views don't support javascript-so reverting back to simple views + + Recology is 1 yr old

    - + -

    Sorry for the temporary loss of GitHub gists...Hopefully dynamic views will support javascript soon!!

    + This blog has lasted a whole year already.  Thanks for reading and commenting. -
    - -
    -

    - - I Work For The Internet ! - -

    +### There are a couple of announcements: + +1. Less blogging: I hope to put in many more years blogging here, but in full disclosure [I am blogging for Journal of Ecology][jeco] now, so I am going to be (and already have been) blogging less here. +2. More blogging: If anyone wants to write guest posts at Recology on the topics of using R for ecology and evolution, or open science, please contact me. +3. Different blogging: I was going to roll out the new dynamic views for this blog, but Google doesn't allow javascript, which is how I include code using GitHub gists. Oh well... + +Anywho, here is the breakdown of visits to this blog, visualized using #ggplot2, of course. There were a total of about 23,000 pageviews in the first year of this blog. + +[jeco]: http://jecologyblog.wordpress.com/ - +Here is the pie chart code I used: + -

    UPDATE: code and figure updated at 647 AM CST on 19 Dec '11. Also, see Jarrett Byrnes (improved) fork of my gist here.

    +Visits to top ten posts: -

    The site I WORK FOR THE INTERNET is collecting pictures and first names (last name initials only) to show collective support against SOPA (the Stop Online Piracy Act). Please stop by their site and add your name/picture.

    + -

    I used the #rstats package twitteR, created by Jeff Gentry, to search for tweets from people signing this site with their picture, then plotted using ggplot2, and also used Hadley's lubridate to round timestamps on tweets to be able to bin tweets in to time slots for plotting.

    +Visits by by pages: -

    Tweets containing the phrase 'I work for the internet' by time:

    + -

    +Visits by top referring sites: -

    Here's the code as a GitHub gist. Sometimes the searchTwitter fxn doesn't returns an error, which I don't understand, but you can play with it:

    + + +Visits by country: + + + +Visits by browsers: + + + +Visits by operating system: + + + +
    + +
    +

    + + Dynamic views don't support javascript-so reverting back to simple views + +

    + + - + Sorry for the temporary loss of GitHub gists...Hopefully dynamic views will support javascript soon!!
    diff --git a/_site/page36/index.html b/_site/page36/index.html index 3a12f19116..9db54f96a2 100644 --- a/_site/page36/index.html +++ b/_site/page36/index.html @@ -61,40 +61,55 @@

    Recology

    - - LondonR meetings presentations + + I Work For The Internet !

    - + + + UPDATE: code and figure updated at 647 AM CST on 19 Dec '11. Also, see Jarrett Byrnes (improved) fork of my gist [here][]. + +The site [I WORK FOR THE INTERNET][iwfti] is collecting pictures and first names (last name initials only) to show collective support against SOPA (the Stop Online Piracy Act). Please stop by their site and add your name/picture. + +I used the #rstats package twitteR, created by Jeff Gentry, to search for tweets from people signing this site with their picture, then plotted using ggplot2, and also used Hadley's lubridate to round timestamps on tweets to be able to bin tweets in to time slots for plotting. + +Tweets containing the phrase 'I work for the internet' by time: + + + +Here's the code as a GitHub gist. Sometimes the searchTwitter fxn doesn't returns an error, which I don't understand, but you can play with it: + + -

    Three presentations uploaded on LondonR meetings website.  I especially enjoyed the JD Long presentation on the seque package for simulations using Amazon's EC2.

    +[here]: https://gist.github.com/1474802 +[iwfti]: http://iworkfortheinternet.org/

    - - rOpenSci won 3rd place in the PLoS-Mendeley Binary Battle! + + LondonR meetings presentations

    - + -


    I am part of the rOpenSci development team (along with Carl Boettiger, Karthik Ram, and Nick Fabina).   Our website: http://ropensci.org/.  Code at Github: https://github.com/ropensci

    We entered two of our R packages for integrating with PLoS Journals (rplos) and Mendeley (RMendeley) in the Mendeley-PLoS Binary Battle.  Get them at GitHub (rplosRMendeley).

    These two packages allow users (from R! of course) to search and retrieve data from PLoS journals (including their altmetrics data), and from Mendeley.  You could surely mash up data from both PLoS and Mendeley.  That's what's cool about rOpenSci - we provide the tools, and leave it up to users vast creativity to do awesome things.

    3rd place gives us a $1,000 prize, plus a Parrot AR Drone helicopter.

    + Three presentations uploaded on LondonR meetings website.  I especially enjoyed the JD Long presentation on the seque package for simulations using Amazon's EC2.

    - - Public vote open for Mendely-PLoS Binary Battle: vote rOpenSci! + + rOpenSci won 3rd place in the PLoS-Mendeley Binary Battle!

    - + -

    http://www.surveygizmo.com/s3/722753/Mendeley-PLoS-Binary-Battle-Public-Vote

    +
    I am part of the rOpenSci development team (along with Carl Boettiger, Karthik Ram, and Nick Fabina).   Our website: http://ropensci.org/.  Code at Github: https://github.com/ropensci

    We entered two of our R packages for integrating with PLoS Journals (rplos) and Mendeley (RMendeley) in the Mendeley-PLoS Binary Battle.  Get them at GitHub (rplosRMendeley).

    These two packages allow users (from R! of course) to search and retrieve data from PLoS journals (including their altmetrics data), and from Mendeley.  You could surely mash up data from both PLoS and Mendeley.  That's what's cool about rOpenSci - we provide the tools, and leave it up to users vast creativity to do awesome things.

    3rd place gives us a $1,000 prize, plus a Parrot AR Drone helicopter.
    diff --git a/_site/page37/index.html b/_site/page37/index.html index 09c59210f0..85953ab684 100644 --- a/_site/page37/index.html +++ b/_site/page37/index.html @@ -61,40 +61,40 @@

    Recology

    - - My talk on doing phylogenetics in R + + Public vote open for Mendely-PLoS Binary Battle: vote rOpenSci!

    - + -

    I gave a talk today on doing very basic phylogenetics in R, including getting sequence data, aligning sequence data, plotting trees, doing trait evolution stuff, etc.

    Please comment if you have code for doing bayesian phylogenetic inference in R.  I know phyloch has function mrbayes, but can't get it to work...


    + http://www.surveygizmo.com/s3/722753/Mendeley-PLoS-Binary-Battle-Public-Vote

    - - Check out a video of my research at RocketHub + + My talk on doing phylogenetics in R

    - + -

    Okay, so this post isn't at all about R - but I can't resist begging my readers for some help.

    I’m trying to get some crowdfunding for my research on the evolution of native plants in agricultural landscapes. My campaign is part of a larger project by about 50 other scientists and me to see how well it works to go straight to the public to get funding for science research. All these projects, including mine, are hosted at a site called RocketHub - a site that hosts crowdfunding projects of all sorts – and now they have science.

    It is important to get a few bucks at the beginning so that the people that don’t know me with deep pockets will hopefully chip in once they see the money ball rolling.

    The funding will go towards paying some students to collect data in the lab for me.

    Here’s the link if you want to donate, or just to check out the video I made about my research!
    http://www.rockethub.com/projects/3790-evolution-in-agriculture



    And watch the video here too:

    + I gave a talk today on doing very basic phylogenetics in R, including getting sequence data, aligning sequence data, plotting trees, doing trait evolution stuff, etc.

    Please comment if you have code for doing bayesian phylogenetic inference in R.  I know phyloch has function mrbayes, but can't get it to work...


    - - My little presentation on getting web data through R + + Check out a video of my research at RocketHub

    - + -
    With examples from rOpenSci R packages. p.s. I am no expert at this...
    Web data from R
    View more presentations from schamber
    + Okay, so this post isn't at all about R - but I can't resist begging my readers for some help.

    I’m trying to get some crowdfunding for my research on the evolution of native plants in agricultural landscapes. My campaign is part of a larger project by about 50 other scientists and me to see how well it works to go straight to the public to get funding for science research. All these projects, including mine, are hosted at a site called RocketHub - a site that hosts crowdfunding projects of all sorts – and now they have science.

    It is important to get a few bucks at the beginning so that the people that don’t know me with deep pockets will hopefully chip in once they see the money ball rolling.

    The funding will go towards paying some students to collect data in the lab for me.

    Here’s the link if you want to donate, or just to check out the video I made about my research!
    http://www.rockethub.com/projects/3790-evolution-in-agriculture



    And watch the video here too:

    diff --git a/_site/page38/index.html b/_site/page38/index.html index 357e2c7bd9..d87c727d1b 100644 --- a/_site/page38/index.html +++ b/_site/page38/index.html @@ -61,40 +61,40 @@

    Recology

    - - Two new rOpenSci R packages are on CRAN + + My little presentation on getting web data through R

    - + -

    Carl Boettiger, a graduate student at UC Davis, just got two packages on CRAN.  One is treebase, which which handshakes with the Treebase API.  The other is rfishbase, which connects with the Fishbase, although I believe just scrapes XML content as there is no API.  See development on GitHub for treebase here, and for rfishbase here.  Carl has some tutorials on treebase and rfishbase at his website here, and we have an official rOpenSci tutorial for treebase here.

    Basically, these two R packages let you search and pull down data from Treebase and Fishbase - pretty awesome.  This improves workflow, and puts your data search and acquisition component into your code, instead of being a bunch of mouse clicks in a browser.

    These two packages are part of the rOpenSci project.

    +
    With examples from rOpenSci R packages. p.s. I am no expert at this...
    Web data from R
    View more presentations from schamber

    - - Two-sex demographic models in R + + Two new rOpenSci R packages are on CRAN

    - + -

    Tom Miller (a prof here at Rice) and Brian Inouye have a paper out in Ecology (paper, appendices) that confronts two-sex models of dispersal with empirical data.

    They conducted the first confrontation of two-sex demographic models with empirical data on lab populations of bean beetles Callosobruchus.

    Their R code for the modeling work is available at Ecological Archives (link here).


    Here is a figure made from running the five blocks of code in 'MillerandInouye_figures.txt' that reproduces Fig. 4 (A-E) in their Ecology paper (p = proportion female, Nt = density).  Nice!
    A: Saturating density dependence
    B: Over-compensatory density dependence
    C: Sex-specific gamma's (but bM=bF=0.5)
    D: Sex-specific b's (but gammaM=gammaF=1)
    E: Sex-specific b's (but gammaM=gammaF=2)


    + Carl Boettiger, a graduate student at UC Davis, just got two packages on CRAN.  One is treebase, which which handshakes with the Treebase API.  The other is rfishbase, which connects with the Fishbase, although I believe just scrapes XML content as there is no API.  See development on GitHub for treebase here, and for rfishbase here.  Carl has some tutorials on treebase and rfishbase at his website here, and we have an official rOpenSci tutorial for treebase here.

    Basically, these two R packages let you search and pull down data from Treebase and Fishbase - pretty awesome.  This improves workflow, and puts your data search and acquisition component into your code, instead of being a bunch of mouse clicks in a browser.

    These two packages are part of the rOpenSci project.

    - - New food web dataset + + Two-sex demographic models in R

    - + -



    So, there is a new food web dataset out that was put in Ecological Archives here, and I thought I would play with it. The food web is from Otago Harbour, an intertidal mudflat ecosystem in New Zealand. The web contains 180 nodes, with 1,924 links.

    Fun stuff...

    igraph, default layout plot

    igraph, circle layout plot, nice

    My funky little gggraph function plot
    get the gggraph function, and make it better, here at Github




    + Tom Miller (a prof here at Rice) and Brian Inouye have a paper out in Ecology (paper, appendices) that confronts two-sex models of dispersal with empirical data.

    They conducted the first confrontation of two-sex demographic models with empirical data on lab populations of bean beetles Callosobruchus.

    Their R code for the modeling work is available at Ecological Archives (link here).


    Here is a figure made from running the five blocks of code in 'Miller_and_Inouye_figures.txt' that reproduces Fig. 4 (A-E) in their Ecology paper (p = proportion female, Nt = density).  Nice!
    A: Saturating density dependence
    B: Over-compensatory density dependence
    C: Sex-specific gamma's (but bM=bF=0.5)
    D: Sex-specific b's (but gammaM=gammaF=1)
    E: Sex-specific b's (but gammaM=gammaF=2)


    diff --git a/_site/page39/index.html b/_site/page39/index.html index 13f4286508..6b858841a3 100644 --- a/_site/page39/index.html +++ b/_site/page39/index.html @@ -59,6 +59,19 @@

    Recology

      +
    +

    + + New food web dataset + +

    + + + +

    So, there is a new food web dataset out that was put in Ecological Archives here, and I thought I would play with it. The food web is from Otago Harbour, an intertidal mudflat ecosystem in New Zealand. The web contains 180 nodes, with 1,924 links.

    Fun stuff...

    igraph, default layout plot

    igraph, circle layout plot, nice

    My funky little gggraph function plot
    get the gggraph function, and make it better, here at Github




    + +
    +

    @@ -68,7 +81,7 @@

    -

    So, I've blogged about this topic before, way back on 5 Jan this year.

    Matt Helmus, a postdoc in the Wootton lab at the University of Chicago, published a paper with Anthony Ives in Ecological Monographs this year (abstract here).  The paper addressed a new statistical approach to phylogenetic community structure.

    As I said in the original post, part of the power of the PGLMM (phylogenetic generalized linear mixed models) approach is that you don't have to conduct quite so many separate statistical tests as with the previous null model/randomization approach.

    Their original code was written in Matlab.  Here I provide the R code that Matt has so graciously shared with me.  There are four functions and a fifth file has an example use case.  The example and output are shown below.

    Look for the inclusion of Matt's PGLMM to the picante R package in the future.

    Here are links to the files as GitHub gists: 
    PGLMM.data.R:  https://gist.github.com/1278205
    PGLMM.fit.R:  https://gist.github.com/1284284
    PGLMM.reml.R:  https://gist.github.com/1284287
    PGLMM.sim.R:  https://gist.github.com/1284288
    PGLMMexample.R:  https://gist.github.com/1284442

    Enjoy!


    The example
    <script src="https://gist.github.com/1284477.js?file=PGLMM
    exampleoutput.R">


    ..and the figures...




    + So, I've blogged about this topic before, way back on 5 Jan this year.

    Matt Helmus, a postdoc in the Wootton lab at the University of Chicago, published a paper with Anthony Ives in Ecological Monographs this year (abstract here).  The paper addressed a new statistical approach to phylogenetic community structure.

    As I said in the original post, part of the power of the PGLMM (phylogenetic generalized linear mixed models) approach is that you don't have to conduct quite so many separate statistical tests as with the previous null model/randomization approach.

    Their original code was written in Matlab.  Here I provide the R code that Matt has so graciously shared with me.  There are four functions and a fifth file has an example use case.  The example and output are shown below.

    Look for the inclusion of Matt's PGLMM to the picante R package in the future.

    Here are links to the files as GitHub gists: 
    PGLMM.data.R:  https://gist.github.com/1278205
    PGLMM.fit.R:  https://gist.github.com/1284284
    PGLMM.reml.R:  https://gist.github.com/1284287
    PGLMM.sim.R:  https://gist.github.com/1284288
    PGLMM_example.R:  https://gist.github.com/1284442

    Enjoy!


    The example



    ..and the figures...



    @@ -81,41 +94,35 @@

    -

    Regular expressions are a powerful in any language to manipulate, search, etc. data.

    + Regular expressions are a powerful in any language to manipulate, search, etc. data. -

    For example:

    +For example: -
    > fruit <- c("apple", "banana", "pear", "pineapple")
    -> fruit
    -[1] "apple"     "banana"    "pear"      "pineapple"
    +{% highlight r %}
    +> fruit <- c("apple", "banana", "pear", "pineapple")
    +> fruit
    +[1] "apple"     "banana"    "pear"      "pineapple"
     
    -> grep("a", fruit) # there is an "a" in each of the words
    -[1] 1 2 3 4
    -> 
    -> strsplit("a string", "s") # strsplit splits the string on the "s"
    -[[1]]
    -[1] "a "    "tring"
    +> grep("a", fruit) # there is an "a" in each of the words +[1] 1 2 3 4 +> +> strsplit("a string", "s") # strsplit splits the string on the "s" +[[1]] +[1] "a " "tring" +{% endhighlight %} -

    R base has many functions for regular expressions, see slide 9 of Ed's talk below. The package stringr, created by Hadley Wickham, is a nice alternative that wraps the base regex functions for easier use. I highly recommend stringr.

    -

    Ed Goodwin, the coordinator of the Houston R Users group, gave a presentation to the group last night on regular expressions in R. It was a great talk, and he is allowing me to post his talk here.

    +R base has many functions for regular expressions, see slide 9 of Ed's talk below. The package stringr, created by Hadley Wickham, is a nice alternative that wraps the base regex functions for easier use. I highly recommend [stringr][]. -

    Enjoy! And thanks for sharing Ed!

    - +Ed Goodwin, the coordinator of the [Houston R Users group][doi], gave a presentation to the group last night on regular expressions in R. It was a great talk, and he is allowing me to post his talk here. -

    - -
    -

    - - R tutorial on visualizations/graphics - -

    +Enjoy! And thanks for sharing Ed! - + -

    Rolf Lohaus, a Huxley postdoctoral fellow here in the EEB dept at Rice University, gave our R course a talk on basic visualizations in R this morning.

    Enjoy!


    +[stringr]: http://cran.r-project.org/web/packages/stringr/index.html +[doi]: http://www.meetup.com/houstonr/
    diff --git a/_site/page4/index.html b/_site/page4/index.html index a2071a743b..cfab7227fe 100644 --- a/_site/page4/index.html +++ b/_site/page4/index.html @@ -61,659 +61,736 @@

    Recology

    - - iDigBio - a new data source in spocc + + rerddap - General purpose R client for ERDDAP servers

    - + + + [ERDDAP](http://upwell.pfeg.noaa.gov/erddap/information.html) is a data server that gives you a simple, consistent way to download subsets of gridded and tabular scientific datasets in common file formats and make graphs and maps. Besides it’s own [RESTful interface](http://upwell.pfeg.noaa.gov/erddap/rest.html), much of which is designed based on [OPeNDAP](https://en.wikipedia.org/wiki/OPeNDAP), ERDDAP can act as an OPeNDAP server and as a [WMS](https://en.wikipedia.org/wiki/Web_Map_Service) server for gridded data. + +ERDDAP is a powerful tool - in a world of heterogeneous data, it's often hard to combine data and serve it through the same interface, with tools for querying/filtering/subsetting the data. That is exactly what ERDDAP does. Heterogeneous data sets often have some similarities, such as latitude/longitude data and usually a time component, but other variables vary widely. + +## NetCDF + +`rerddap` supports [NetCDF format](https://en.wikipedia.org/wiki/NetCDF), and is the default when using the `griddap()` function. We use `ncdf` by default, but you can choose to use `ncdf4` instead. + +## Caching + +Data files downloaded are cached in a single hidden directory `~/.rerddap` on your machine. It's hidden so that you don't accidentally delete the data, but you can still easily delete the data if you like, open files, move them around, etc. + +When you use `griddap()` or `tabledap()` functions, we construct a [MD5 hash](https://en.wikipedia.org/wiki/MD5#MD5_hashes) from the base URL, and any query parameters - this way each query is separately cached. Once we have the hash, we look in `~/.rerddap` for a matching hash. If there's a match we use that file on disk - if no match, we make a http request for the data to the ERDDAP server you specify. + +## ERDDAP servers + +You can get a data.frame of ERDDAP servers using the function `servers()`. Most I think serve some kind of NOAA data, but there are a few that aren't NOAA data. Here are a few: + + + + + +```r +head(servers()) +#> name +#> 1 Marine Domain Awareness (MDA) - Italy +#> 2 Marine Institute - Ireland +#> 3 CoastWatch Caribbean/Gulf of Mexico Node +#> 4 CoastWatch West Coast Node +#> 5 NOAA IOOS CeNCOOS (Central and Northern California Ocean Observing System) +#> 6 NOAA IOOS NERACOOS (Northeastern Regional Association of Coastal and Ocean Observing Systems) +#> url +#> 1 https://bluehub.jrc.ec.europa.eu/erddap/ +#> 2 http://erddap.marine.ie/erddap/ +#> 3 http://cwcgom.aoml.noaa.gov/erddap/ +#> 4 http://coastwatch.pfeg.noaa.gov/erddap/ +#> 5 http://erddap.axiomalaska.com/erddap/ +#> 6 http://www.neracoos.org/erddap/ +``` + + +## Install -

    iDigBio, or Integrated Digitized Biocollections, collects and provides access to species occurrence data, and associated metadata (e.g., images of specimens, when provided). They collect data from a lot of different providers. They have a nice web interface for searching, check out idigbio.org/portal/search.

    - -

    spocc is a package we've been working on at rOpenSci for a while now - it is a one stop shop for retrieving species ocurrence data. As new sources of species occurrence data come to our attention, and are available via a RESTful API, we incorporate them into spocc.

    - -

    I attended last week a hackathon put on by iDigBio. One of the projects I worked on was integrating iDigBio into spocc.

    - -

    With the addition of iDigBio, we now have in spocc:

    - - - -

    The following is a quick demo of getting iDigBio data in spocc

    - -

    Install

    - -

    Get updated versions of rgbif and ridigbio first. And get leaflet to make an interactive map.

    -
    devtools::install_github("ropensci/rgbif", "iDigBio/ridigbio", "rstudio/leaflet")
    -devtools::install_github("ropensci/spocc")
    -
    library("spocc")
    -
    -

    Use ridigbio - the R client for iDigBio

    -
    library("ridigbio")
    -idig_search_records(rq = list(genus = "acer"), limit = 5)
    -#>                                   uuid
    -#> 1 00041678-5df1-4a23-ba78-8c12f60af369
    -#> 2 00072caf-0f24-447f-b68e-a20299f6afc7
    -#> 3 000a6b9b-0bbd-46f6-82cb-848c30c46313
    -#> 4 001d05e0-9c86-466d-957d-e73e2ce64fbe
    -#> 5 0022a2da-bc97-4bef-b2a5-b8a9944fc677
    -#>                                    occurrenceid catalognumber      family
    -#> 1 urn:uuid:b275f928-5c0d-4832-ae82-fde363d8fde1          <NA> sapindaceae
    -#> 2          40428b90-27a5-11e3-8d47-005056be0003   lsu00049997   aceraceae
    -#> 3          02ca5aae-d8ab-492f-af10-e005b96c2295        191243 sapindaceae
    -#> 4                     urn:catalog:cas:ds:679715      ds679715 sapindaceae
    -#> 5          b12bd651-2c6b-11e3-b3b8-180373cac83e         41898 sapindaceae
    -#>   genus  scientificname       country stateprovince geopoint.lat
    -#> 1  acer     acer rubrum united states      illinois         <NA>
    -#> 2  acer    acer negundo united states     louisiana         <NA>
    -#> 3  acer            <NA> united states      new york         <NA>
    -#> 4  acer acer circinatum united states    california      41.8714
    -#> 5  acer     acer rubrum united states      maryland   39.4197222
    -#>   geopoint.lon             datecollected           collector
    -#> 1         <NA> 1967-06-25T00:00:00+00:00     john e. ebinger
    -#> 2         <NA> 1991-04-19T00:00:00+00:00     alan w. lievens
    -#> 3         <NA>                      <NA> stephen f. hilfiker
    -#> 4    -123.8503 1930-10-27T00:00:00+00:00        carl b. wolf
    -#> 5  -77.1227778 1980-04-29T00:00:00+00:00         doweary, d.
    -
    -

    Use spocc

    - -

    Scientific name search

    - -

    Same search as above with ridigbio

    -
    occ(query = "Acer", from = "idigbio", limit = 5)
    -#> Searched: idigbio
    -#> Occurrences - Found: 379, Returned: 5
    -#> Search type: Scientific
    -#>   idigbio: Acer (5)
    -
    -

    Geographic search

    - -

    iDigBio uses Elasticsearch syntax to define a geographic search, but all you need to do is give a numeric vector of length 4 defining a bounding box, and you're good to go.

    -
    bounds <- c(-120, 40, -100, 45)
    -occ(from = "idigbio", geometry = bounds, limit = 10)
    -#> Searched: idigbio
    -#> Occurrences - Found: 346,737, Returned: 10
    -#> Search type: Geometry
    -
    -

    W/ or W/O Coordinates

    - -

    Don't pass has_coords (gives data w/ and w/o coordinates data)

    -
    occ(query = "Acer", from = "idigbio", limit = 5)
    -#> Searched: idigbio
    -#> Occurrences - Found: 379, Returned: 5
    -#> Search type: Scientific
    -#>   idigbio: Acer (5)
    -
    -

    Only records with coordinates data

    -
    occ(query = "Acer", from = "idigbio", limit = 5, has_coords = TRUE)
    -#> Searched: idigbio
    -#> Occurrences - Found: 16, Returned: 5
    -#> Search type: Scientific
    -#>   idigbio: Acer (5)
    -
    -

    Only records without coordinates data

    -
    occ(query = "Acer", from = "idigbio", limit = 5, has_coords = FALSE)
    -#> Searched: idigbio
    -#> Occurrences - Found: 363, Returned: 5
    -#> Search type: Scientific
    -#>   idigbio: Acer (5)
    -
    -

    Make an interactive map

    -
    library("leaflet")
    -bounds <- c(-120, 40, -100, 45)
    -leaflet(data = dat) %>% 
    -  addTiles() %>%
    -  addMarkers(~longitude, ~latitude, popup = ~name) %>% 
    -  addRectangles(
    -    lng1 = bounds[1], lat1 = bounds[4],
    -    lng2 = bounds[3], lat2 = bounds[2],
    -    fillColor = "transparent"
    -  )
    -
    -

    image

    +From CRAN + + +```r +install.packages("rerddap") +``` + +Or development version from GitHub + + +```r +devtools::install_github("ropensci/rerddap") +``` + + +```r +library('rerddap') +``` + +## Search + +First, you likely want to search for data, specifying whether to search for either `griddadp` or `tabledap` datasets. The default is `griddap`. + + +```r +ed_search(query = 'size', which = "table") +#> 11 results, showing first 20 +#> title +#> 1 CalCOFI Fish Sizes +#> 2 CalCOFI Larvae Sizes +#> 3 Channel Islands, Kelp Forest Monitoring, Size and Frequency, Natural Habitat +#> 4 CalCOFI Larvae Counts Positive Tows +#> 5 CalCOFI Tows +#> 7 OBIS - ARGOS Satellite Tracking of Animals +#> 8 GLOBEC NEP MOCNESS Plankton (MOC1) Data +#> 9 GLOBEC NEP Vertical Plankton Tow (VPT) Data +#> 10 NWFSC Observer Fixed Gear Data, off West Coast of US, 2002-2006 +#> 11 NWFSC Observer Trawl Data, off West Coast of US, 2002-2006 +#> 12 AN EXPERIMENTAL DATASET: Underway Sea Surface Temperature and Salinity Aboard the Oleander +#> dataset_id +#> 1 erdCalCOFIfshsiz +#> 2 erdCalCOFIlrvsiz +#> 3 erdCinpKfmSFNH +#> 4 erdCalCOFIlrvcntpos +#> 5 erdCalCOFItows +#> 7 aadcArgos +#> 8 erdGlobecMoc1 +#> 9 erdGlobecVpt +#> 10 nwioosObsFixed2002 +#> 11 nwioosObsTrawl2002 +#> 12 nodcPJJU +``` + + +```r +ed_search(query = 'size', which = "grid") +#> 6 results, showing first 20 +#> title +#> 6 NOAA Global Coral Bleaching Monitoring Products +#> 13 USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][eta_rho][xi_rho] +#> 14 USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][eta_u][xi_u] +#> 15 USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][eta_v][xi_v] +#> 16 USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][s_rho][eta_rho][xi_rho] +#> 17 USGS COAWST Forecast, US East Coast and Gulf of Mexico (Experimental) [time][Nbed][eta_rho][xi_rho] +#> dataset_id +#> 6 NOAA_DHW +#> 13 whoi_ed12_89ce_9592 +#> 14 whoi_61c3_0b5d_cd61 +#> 15 whoi_62d0_9d64_c8ff +#> 16 whoi_7dd7_db97_4bbe +#> 17 whoi_a4fb_2c9c_16a7 +``` + +This gives back dataset titles and identifiers - with which you should be able to get a sense for which dataset you may want to fetch. + +## Information + +After searching you can get more information on a single dataset + + +```r +info('whoi_62d0_9d64_c8ff') +#> whoi_62d0_9d64_c8ff +#> Dimensions (range): +#> time: (2012-06-25T01:00:00Z, 2015-06-24T00:00:00Z) +#> eta_v: (0, 334) +#> xi_v: (0, 895) +#> Variables: +#> bedload_Vsand_01: +#> Units: kilogram meter-1 s-1 +#> bedload_Vsand_02: +#> Units: kilogram meter-1 s-1 +... +``` + +Which is a simple S3 list but prints out pretty, so it's easy to quickly scan the printed output and see what you need to see to proceed. That is, in the next step you want to get the dataset, and you'll want to specify your search using some combination of values for latitude, longitude, and time. + +## griddap (gridded) data + +First, get information on a dataset to see time range, lat/long range, and variables. + + +```r +(out <- info('noaa_esrl_027d_0fb5_5d38')) +#> noaa_esrl_027d_0fb5_5d38 +#> Dimensions (range): +#> time: (1850-01-01T00:00:00Z, 2014-05-01T00:00:00Z) +#> latitude: (87.5, -87.5) +#> longitude: (-177.5, 177.5) +#> Variables: +#> air: +#> Range: -20.9, 19.5 +#> Units: degC +``` + +Then query for gridded data using the `griddap()` function + + +```r +(res <- griddap(out, + time = c('2012-01-01', '2012-01-30'), + latitude = c(21, 10), + longitude = c(-80, -70) +)) +#> noaa_esrl_027d_0fb5_5d38 +#> Path: [~/.rerddap/648ed11e8b911b65e39eb63c8df339df.nc] +#> Last updated: [2015-05-09 08:31:10] +#> File size: [0 mb] +#> Dimensions (dims/vars): [3 X 1] +#> Dim names: time, latitude, longitude +#> Variable names: CRUTEM3: Surface Air Temperature Monthly Anomaly +#> data.frame (rows/columns): [18 X 4] +#> time latitude longitude air +#> 1 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 2 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 3 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 4 2012-01-01T00:00:00Z 22.5 -77.5 -0.1 +#> 5 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 6 2012-01-01T00:00:00Z 22.5 -77.5 -0.2 +#> 7 2012-01-01T00:00:00Z 17.5 -72.5 0.2 +#> 8 2012-01-01T00:00:00Z 17.5 -72.5 NA +#> 9 2012-01-01T00:00:00Z 17.5 -72.5 0.3 +#> 10 2012-02-01T00:00:00Z 17.5 -72.5 NA +#> .. ... ... ... ... +``` + +The output of `griddap()` is a list that you can explore further. Get the summary + + +```r +res$summary +#> [1] "file ~/.rerddap/648ed11e8b911b65e39eb63c8df339df.nc has 3 dimensions:" +#> [1] "time Size: 2" +#> [1] "latitude Size: 3" +#> [1] "longitude Size: 3" +#> [1] "------------------------" +#> [1] "file ~/.rerddap/648ed11e8b911b65e39eb63c8df339df.nc has 1 variables:" +#> [1] "float air[longitude,latitude,time] Longname:CRUTEM3: Surface Air Temperature Monthly Anomaly Missval:-9.96920996838687e+36" +``` + +Or get the dimension variables (just the names of the variables for brevity here) + + +```r +names(res$summary$dim) +#> [1] "time" "latitude" "longitude" +``` + +Get the data.frame (beware: you may want to just look at the `head` of the data.frame if large) + + +```r +res$data +#> time latitude longitude air +#> 1 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 2 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 3 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 4 2012-01-01T00:00:00Z 22.5 -77.5 -0.10 +#> 5 2012-01-01T00:00:00Z 22.5 -77.5 NA +#> 6 2012-01-01T00:00:00Z 22.5 -77.5 -0.20 +#> 7 2012-01-01T00:00:00Z 17.5 -72.5 0.20 +#> 8 2012-01-01T00:00:00Z 17.5 -72.5 NA +#> 9 2012-01-01T00:00:00Z 17.5 -72.5 0.30 +#> 10 2012-02-01T00:00:00Z 17.5 -72.5 NA +#> 11 2012-02-01T00:00:00Z 17.5 -72.5 NA +#> 12 2012-02-01T00:00:00Z 17.5 -72.5 NA +#> 13 2012-02-01T00:00:00Z 12.5 -67.5 0.40 +#> 14 2012-02-01T00:00:00Z 12.5 -67.5 NA +#> 15 2012-02-01T00:00:00Z 12.5 -67.5 0.20 +#> 16 2012-02-01T00:00:00Z 12.5 -67.5 0.00 +#> 17 2012-02-01T00:00:00Z 12.5 -67.5 NA +#> 18 2012-02-01T00:00:00Z 12.5 -67.5 0.32 +``` + +You can actually still explore the original netcdf summary object, e.g., + + +```r +res$summary$dim$time +#> $name +#> [1] "time" +#> +#> $len +#> [1] 2 +#> +#> $unlim +#> [1] FALSE +#> +#> $id +#> [1] 1 +#> +#> $dimvarid +#> [1] 1 +#> +#> $units +#> [1] "seconds since 1970-01-01T00:00:00Z" +#> +#> $vals +#> [1] 1325376000 1328054400 +#> +#> $create_dimvar +#> [1] TRUE +#> +#> attr(,"class") +#> [1] "dim.ncdf" +``` + +## tabledap (tabular) data + +`tabledap` is data that is not gridded by lat/lon/time. In addition, the query interface is a bit different. Notice that you can do less than, more than, equal to type queries, but they are specified as character strings. + + +```r +(out <- info('erdCalCOFIfshsiz')) +#> erdCalCOFIfshsiz +#> Variables: +#> calcofi_species_code: +#> Range: 19, 1550 +#> common_name: +#> cruise: +#> fish_1000m3: +#> Units: Fish per 1,000 cubic meters of water sampled +#> fish_count: +#> fish_size: +... +``` + + +```r +(dat <- tabledap(out, 'time>=2001-07-07', 'time<=2001-07-10', + fields = c('longitude', 'latitude', 'fish_size', 'itis_tsn', 'scientific_name'))) +#> erdCalCOFIfshsiz +#> Path: [~/.rerddap/f013f9ee09bdb4184928d533e575e948.csv] +#> Last updated: [2015-05-09 08:31:21] +#> File size: [0.03 mb] +#> Dimensions: [558 X 5] +#> +#> longitude latitude fish_size itis_tsn scientific_name +#> 2 -118.26 33.255 22.9 623745 Nannobrachium ritteri +#> 3 -118.26 33.255 22.9 623745 Nannobrachium ritteri +#> 4 -118.10667 32.738335 31.5 623625 Lipolagus ochotensis +#> 5 -118.10667 32.738335 48.3 623625 Lipolagus ochotensis +#> 6 -118.10667 32.738335 15.5 162221 Argyropelecus sladeni +#> 7 -118.10667 32.738335 16.3 162221 Argyropelecus sladeni +#> 8 -118.10667 32.738335 17.8 162221 Argyropelecus sladeni +#> 9 -118.10667 32.738335 18.2 162221 Argyropelecus sladeni +#> 10 -118.10667 32.738335 19.2 162221 Argyropelecus sladeni +#> 11 -118.10667 32.738335 20.0 162221 Argyropelecus sladeni +#> .. ... ... ... ... ... +``` + +Since both `griddap()` and `tabledap()` give back data.frame's, it's easy to do downstream manipulation. For example, we can use `dplyr` to filter, summarize, group, and sort: + + +```r +library("dplyr") +dat$fish_size <- as.numeric(dat$fish_size) +df <- tbl_df(dat) %>% + filter(fish_size > 30) %>% + group_by(scientific_name) %>% + summarise(mean_size = mean(fish_size)) %>% + arrange(desc(mean_size)) +df +#> Source: local data frame [20 x 2] +#> +#> scientific_name mean_size +#> 1 Idiacanthus antrostomus 253.00000 +#> 2 Stomias atriventer 189.25000 +#> 3 Lestidiops ringens 98.70000 +#> 4 Tarletonbeania crenularis 56.50000 +#> 5 Ceratoscopelus townsendi 53.70000 +#> 6 Stenobrachius leucopsarus 47.74538 +#> 7 Sardinops sagax 47.00000 +#> 8 Nannobrachium ritteri 43.30250 +#> 9 Bathylagoides wesethi 43.09167 +#> 10 Vinciguerria lucetia 42.00000 +#> 11 Cyclothone acclinidens 40.80000 +#> 12 Lipolagus ochotensis 39.72500 +#> 13 Leuroglossus stilbius 38.35385 +#> 14 Triphoturus mexicanus 38.21342 +#> 15 Diaphus theta 37.88571 +#> 16 Trachipterus altivelis 37.70000 +#> 17 Symbolophorus californiensis 37.66000 +#> 18 Nannobrachium regale 37.50000 +#> 19 Merluccius productus 36.61333 +#> 20 Argyropelecus sladeni 32.43333 +``` + +Then make a cute little plot + + +```r +library("ggplot2") +ggplot(df, aes(reorder(scientific_name, mean_size), mean_size)) + + geom_bar(stat = "identity") + + coord_flip() + + theme_grey(base_size = 20) + + labs(y = "Mean Size", x = "Species") +``` + +![plot of chunk unnamed-chunk-19](/public/img/2015-06-24-rerddap/unnamed-chunk-19-1.png)

    - - openadds - open addresses client + + iDigBio - a new data source in spocc

    - + -

    openadds talks to Openaddresses.io. a run down of its things:

    - -

    Install

    -
    devtools::install_github("sckott/openadds")
    -
    library("openadds")
    -
    -

    List datasets

    - -

    Scrapes links to datasets from the openaddresses site

    -
    dat <- oa_list()
    -dat[2:6]
    -#> [1] "http://data.openaddresses.io.s3.amazonaws.com/20150511/au-tas-launceston.csv"   
    -#> [2] "http://s3.amazonaws.com/data.openaddresses.io/20141127/au-victoria.zip"         
    -#> [3] "http://data.openaddresses.io.s3.amazonaws.com/20150511/be-flanders.zip"         
    -#> [4] "http://data.openaddresses.io.s3.amazonaws.com/20150417/ca-ab-calgary.zip"       
    -#> [5] "http://data.openaddresses.io.s3.amazonaws.com/20150511/ca-ab-grande_prairie.zip"
    -
    -

    Search for datasets

    - -

    Uses oa_list() internally, then searches through columns requested.

    -
    oa_search(country = "us", state = "ca")
    -#> Source: local data frame [68 x 5]
    -#> 
    -#>    country state             city  ext
    -#> 1       us    ca san_mateo_county .zip
    -#> 2       us    ca   alameda_county .zip
    -#> 3       us    ca   alameda_county .zip
    -#> 4       us    ca           amador .zip
    -#> 5       us    ca           amador .zip
    -#> 6       us    ca      bakersfield .zip
    -#> 7       us    ca      bakersfield .zip
    -#> 8       us    ca         berkeley .zip
    -#> 9       us    ca         berkeley .zip
    -#> 10      us    ca     butte_county .zip
    -#> ..     ...   ...              ...  ...
    -#> Variables not shown: url (chr)
    -
    -

    Get data

    - -

    Passing in a URL

    -
    (out1 <- oa_get(dat[5]))
    -#> <Openaddresses data> ~/.openadds/ca-ab-calgary.zip
    -#> Dimensions [350962, 13]
    -#> 
    -#>    OBJECTID ADDRESS_TY                 ADDRESS    STREET_NAM STREET_TYP
    -#> 0    757023     Parcel  249 SAGE MEADOWS CI NW  SAGE MEADOWS         CI
    -#> 1    757022     Parcel           2506 17 ST SE            17         ST
    -#> 2    757021     Parcel     305 EVANSPARK GD NW     EVANSPARK         GD
    -#> 3    757020     Parcel     321 EVANSPARK GD NW     EVANSPARK         GD
    -#> 4    757019     Parcel   204 EVANSBROOKE LD NW   EVANSBROOKE         LD
    -#> 5    757018     Parcel   200 EVANSBROOKE LD NW   EVANSBROOKE         LD
    -#> 6    757017     Parcel 219 HIDDEN VALLEY LD NW HIDDEN VALLEY         LD
    -#> 7    757016     Parcel 211 HIDDEN VALLEY LD NW HIDDEN VALLEY         LD
    -#> 8    757015     Parcel 364 HIDDEN VALLEY LD NW HIDDEN VALLEY         LD
    -#> 9    757014     Parcel 348 HIDDEN VALLEY LD NW HIDDEN VALLEY         LD
    -#> ..      ...        ...                     ...           ...        ...
    -#> Variables not shown: STREET_QUA (fctr), HOUSE_NUMB (int), HOUSE_ALPH
    -#>      (fctr), SUITE_NUMB (int), SUITE_ALPH (fctr), LONGITUDE (dbl),
    -#>      LATITUDE (dbl), COMM_NAME (fctr)
    -
    -

    First getting URL for dataset through as_openadd(), then passing to oa_get()

    -
    (x <- as_openadd("us", "nm", "hidalgo"))
    -#> <<OpenAddreses>> 
    -#>   <<country>> us
    -#>   <<state>> nm
    -#>   <<city>> hidalgo
    -#>   <<extension>> .csv
    -
    oa_get(x)
    -#> <Openaddresses data> ~/.openadds/us-nm-hidalgo.csv
    -#> Dimensions [170659, 37]
    -#> 
    -#>    OBJECTID Shape ADD_NUM ADD_SUF PRE_MOD PRE_DIR PRE_TYPE         ST_NAME
    -#> 1         1    NA     422                       S                      2ND
    -#> 2         2    NA    1413                       S                      4TH
    -#> 3         3    NA     412                       E                 CHAMPION
    -#> 4         4    NA     110                       E                   SAMANO
    -#> 5         5    NA    2608                       W          FREDDY GONZALEZ
    -#> 6         6    NA    2604                       W          FREDDY GONZALEZ
    -#> 7         7    NA    1123                       W                      FAY
    -#> 8         8    NA     417                       S                      2ND
    -#> 9         9    NA    4551                       E                    TEXAS
    -#> 10       10    NA     810                                        DRIFTWOOD
    -#> ..      ...   ...     ...     ...     ...     ...      ...             ...
    -#> Variables not shown: ST_TYPE (chr), POS_DIR (chr), POS_MOD (chr), ESN
    -#>      (int), MSAG_COMM (chr), PARCEL_ID (chr), PLACE_TYPE (chr), LANDMARK
    -#>      (chr), BUILDING (chr), UNIT (chr), ROOM (chr), FLOOR (int), LOC_NOTES
    -#>      (chr), ST_ALIAS (chr), FULL_ADDR (chr), ZIP (chr), POSTAL_COM (chr),
    -#>      MUNICIPAL (chr), COUNTY (chr), STATE (chr), SOURCE (chr), REGION
    -#>      (chr), EXCH (chr), LAT (dbl), LONG (dbl), PICTURE (chr), OA:x (dbl),
    -#>      OA:y (dbl), OA:geom (chr)
    -
    -

    Combine multiple datasets

    - -

    combine attemps to guess lat/long and address columns, but definitely more work to do to make -this work for most cases. Lat/long and address columns vary among every dataset - some datasets -have no lat/long data, some have no address data.

    -
    out2 <- oa_get(dat[32])
    -(alldat <- oa_combine(out1, out2))
    -#> Source: local data frame [418,623 x 4]
    -#> 
    -#>          lon      lat                 address           dataset
    -#> 1  -114.1303 51.17188  249 SAGE MEADOWS CI NW ca-ab-calgary.zip
    -#> 2  -114.0190 51.03168           2506 17 ST SE ca-ab-calgary.zip
    -#> 3  -114.1175 51.17497     305 EVANSPARK GD NW ca-ab-calgary.zip
    -#> 4  -114.1175 51.17461     321 EVANSPARK GD NW ca-ab-calgary.zip
    -#> 5  -114.1212 51.16268   204 EVANSBROOKE LD NW ca-ab-calgary.zip
    -#> 6  -114.1213 51.16264   200 EVANSBROOKE LD NW ca-ab-calgary.zip
    -#> 7  -114.1107 51.14784 219 HIDDEN VALLEY LD NW ca-ab-calgary.zip
    -#> 8  -114.1108 51.14768 211 HIDDEN VALLEY LD NW ca-ab-calgary.zip
    -#> 9  -114.1121 51.14780 364 HIDDEN VALLEY LD NW ca-ab-calgary.zip
    -#> 10 -114.1117 51.14800 348 HIDDEN VALLEY LD NW ca-ab-calgary.zip
    -#> ..       ...      ...                     ...               ...
    -
    -

    Map data

    - -

    Get some data

    -
    (out <- oa_get(dat[400]))
    -#> <Openaddresses data> ~/.openadds/us-ca-sonoma_county.zip
    -#> Dimensions [217243, 5]
    -#> 
    -#>          LON      LAT  NUMBER          STREET POSTCODE
    -#> 1  -122.5327 38.29779 3771  A       Cory Lane       NA
    -#> 2  -122.5422 38.30354   18752 White Oak Drive       NA
    -#> 3  -122.5412 38.30327   18749 White Oak Drive       NA
    -#> 4  -122.3997 38.26122    3552       Napa Road       NA
    -#> 5  -122.5425 38.30404    3998 White Oak Court       NA
    -#> 6  -122.5429 38.30434    4026 White Oak Court       NA
    -#> 7  -122.5430 38.30505    4039 White Oak Court       NA
    -#> 8  -122.5417 38.30504    4017 White Oak Court       NA
    -#> 9  -122.5409 38.30436   18702 White Oak Drive       NA
    -#> 10 -122.5403 38.30392   18684 White Oak Drive       NA
    -#> ..       ...      ...     ...             ...      ...
    -
    -

    Make an interactive map (not all data)

    -
    library("leaflet")
    -
    -x <- oa_get(oa_search(country = "us", city = "boulder")[1,]$url)
    -y <- oa_get(oa_search(country = "us", city = "gunnison")[1,]$url)
    -oa_combine(x, y) %>% 
    -  leaflet() %>%
    -  addTiles() %>%
    -  addCircles(lat = ~lat, lng = ~lon, popup = ~address)
    -
    -

    image

    - -

    To do

    - -
      -
    • Surely there are many datasets that won't work in oa_combine() - gotta go through many more.
    • -
    • An easy viz function wrapping leaflet
    • -
    • Since you can get a lot of spatial data quickly, easy way to visualize big data, maybe marker clusters?
    • -
    + [iDigBio](https://www.idigbio.org/), or _Integrated Digitized Biocollections_, collects and provides access to species occurrence data, and associated metadata (e.g., images of specimens, when provided). They collect data from [a lot of different providers](https://www.idigbio.org/portal/publishers). They have a nice web interface for searching, check out [idigbio.org/portal/search](https://www.idigbio.org/portal/search). + +`spocc` is a package we've been working on at [rOpenSci](http://ropensci.org/) for a while now - it is a one stop shop for retrieving species ocurrence data. As new sources of species occurrence data come to our attention, and are available via a RESTful API, we incorporate them into `spocc`. + +I attended last week a [hackathon put on by iDigBio](https://github.com/idigbio-api-hackathon/HackathonCentral/). One of the projects I worked on was integrating iDigBio into `spocc`. + +With the addition of iDigBio, we now have in `spocc`: + +* [GBIF](http://www.gbif.org/) +* [iNaturalist](http://www.inaturalist.org/) +* [USGS Bison](http://bison.usgs.ornl.gov/) +* [eBird](http://ebird.org/content/ebird/) +* [Ecoengine](https://ecoengine.berkeley.edu/) +* [Vertnet](http://vertnet.org/) +* [iDigBio](https://www.idigbio.org/) + +The following is a quick demo of getting iDigBio data in `spocc` + +## Install + +Get updated versions of `rgbif` and `ridigbio` first. And get `leaflet` to make an interactive map. + + +```r +devtools::install_github("ropensci/rgbif", "iDigBio/ridigbio", "rstudio/leaflet") +devtools::install_github("ropensci/spocc") +``` + + +```r +library("spocc") +``` + +## Use ridigbio - the R client for iDigBio + + +```r +library("ridigbio") +idig_search_records(rq = list(genus = "acer"), limit = 5) +#> uuid +#> 1 00041678-5df1-4a23-ba78-8c12f60af369 +#> 2 00072caf-0f24-447f-b68e-a20299f6afc7 +#> 3 000a6b9b-0bbd-46f6-82cb-848c30c46313 +#> 4 001d05e0-9c86-466d-957d-e73e2ce64fbe +#> 5 0022a2da-bc97-4bef-b2a5-b8a9944fc677 +#> occurrenceid catalognumber family +#> 1 urn:uuid:b275f928-5c0d-4832-ae82-fde363d8fde1 sapindaceae +#> 2 40428b90-27a5-11e3-8d47-005056be0003 lsu00049997 aceraceae +#> 3 02ca5aae-d8ab-492f-af10-e005b96c2295 191243 sapindaceae +#> 4 urn:catalog:cas:ds:679715 ds679715 sapindaceae +#> 5 b12bd651-2c6b-11e3-b3b8-180373cac83e 41898 sapindaceae +#> genus scientificname country stateprovince geopoint.lat +#> 1 acer acer rubrum united states illinois +#> 2 acer acer negundo united states louisiana +#> 3 acer united states new york +#> 4 acer acer circinatum united states california 41.8714 +#> 5 acer acer rubrum united states maryland 39.4197222 +#> geopoint.lon datecollected collector +#> 1 1967-06-25T00:00:00+00:00 john e. ebinger +#> 2 1991-04-19T00:00:00+00:00 alan w. lievens +#> 3 stephen f. hilfiker +#> 4 -123.8503 1930-10-27T00:00:00+00:00 carl b. wolf +#> 5 -77.1227778 1980-04-29T00:00:00+00:00 doweary, d. +``` + +## Use spocc + +### Scientific name search + +Same search as above with `ridigbio` + + +```r +occ(query = "Acer", from = "idigbio", limit = 5) +#> Searched: idigbio +#> Occurrences - Found: 379, Returned: 5 +#> Search type: Scientific +#> idigbio: Acer (5) +``` + +### Geographic search + +iDigBio uses Elasticsearch syntax to define a geographic search, but all you need to do is give a numeric vector of length 4 defining a bounding box, and you're good to go. + + +```r +bounds <- c(-120, 40, -100, 45) +occ(from = "idigbio", geometry = bounds, limit = 10) +#> Searched: idigbio +#> Occurrences - Found: 346,737, Returned: 10 +#> Search type: Geometry +``` + +### W/ or W/O Coordinates + +Don't pass `has_coords` (gives data w/ and w/o coordinates data) + + +```r +occ(query = "Acer", from = "idigbio", limit = 5) +#> Searched: idigbio +#> Occurrences - Found: 379, Returned: 5 +#> Search type: Scientific +#> idigbio: Acer (5) +``` + +Only records with coordinates data + + +```r +occ(query = "Acer", from = "idigbio", limit = 5, has_coords = TRUE) +#> Searched: idigbio +#> Occurrences - Found: 16, Returned: 5 +#> Search type: Scientific +#> idigbio: Acer (5) +``` + +Only records without coordinates data + + +```r +occ(query = "Acer", from = "idigbio", limit = 5, has_coords = FALSE) +#> Searched: idigbio +#> Occurrences - Found: 363, Returned: 5 +#> Search type: Scientific +#> idigbio: Acer (5) +``` + +### Make an interactive map + + +```r +library("leaflet") +bounds <- c(-120, 40, -100, 45) +leaflet(data = dat) %>% + addTiles() %>% + addMarkers(~longitude, ~latitude, popup = ~name) %>% + addRectangles( + lng1 = bounds[1], lat1 = bounds[4], + lng2 = bounds[3], lat2 = bounds[2], + fillColor = "transparent" + ) +``` + +![image](/public/img/2015-06-08-idigbio-in-spocc/plot.png)

    - - lawn - a new package to do geospatial analysis + + openadds - open addresses client

    -

    lawn is an R wrapper for the Javascript library turf.js for advanced geospatial analysis. In addition, we have a few functions to interface with the geojson-random Javascript library.

    - -

    lawn includes traditional spatial operations, helper functions for creating GeoJSON data, and data classification and statistics tools.

    - -

    There is an additional helper function (see view()) in this package to help visualize data with interactive maps via the leaflet package (https://github.com/rstudio/leaflet). Note that leaflet is not required to install lawn - it's in Suggests, not Imports or Depends.

    - -

    Use cases for this package include (but not limited to, obs.) the following (all below assumes GeoJSON format):

    - -
      -
    • Create random spatial data.
    • -
    • Convert among spatial data types (e.g. Polygon to FeatureCollection)
    • -
    • Transform objects, including merging many, simplifying, calculating hulls, etc.
    • -
    • Measuring objects
    • -
    • Performing interpolation of objects
    • -
    • Aggregating data (aka properties) associated with objects
    • -
    - -

    Install

    - -

    Stable lawn version from CRAN - this should fetch leaflet, which is not on CRAN, but in a drat repo (let me know if it doesn't)

    -
    install.packages("lawn")
    -
    -

    Or, the development version from Github

    -
    devtools::install_github("ropensci/lawn")
    -
    library("lawn")
    -
    -

    view

    - -

    lawn includes a tiny helper function for visualizing geojson. For examples below, we'll make liberal use of the lawn::view() function to visualize what it is the heck we're doing. mkay, lets roll...

    - -

    We've tried to make view() work with as many inputs as possible, from class character containing -json to the class json from the jsonlite package, to the class list to all of the GeoJSON outputs -from functions in lawn.

    -
    view(lawn_data$points_average)
    -
    -

    map1

    - -

    Here, we sample at random two points from the same dataset just viewed.

    -
    lawn_sample(lawn_data$points_average, 2) %>% view()
    -
    -

    map2

    - -

    Make some geojson data

    - -

    Point

    -
    lawn_point(c(-74.5, 40))
    -#> $type
    -#> [1] "Feature"
    -#> 
    -#> $geometry
    -#> $geometry$type
    -#> [1] "Point"
    -#> 
    -#> $geometry$coordinates
    -#> [1] -74.5  40.0
    -#> 
    -#> 
    -#> $properties
    -#> named list()
    -#> 
    -#> attr(,"class")
    -#> [1] "point"
    -
    lawn_point(c(-74.5, 40)) %>% view
    -
    -

    point

    - -

    Polygon

    -
    rings <- list(list(
    -  c(-2.275543, 53.464547),
    -  c(-2.275543, 53.489271),
    -  c(-2.215118, 53.489271),
    -  c(-2.215118, 53.464547),
    -  c(-2.275543, 53.464547)
    -))
    -lawn_polygon(rings)
    -#> $type
    -#> [1] "Feature"
    -#> 
    -#> $geometry
    -#> $geometry$type
    -#> [1] "Polygon"
    -#> 
    -#> $geometry$coordinates
    -#> , , 1
    -#> 
    -#>           [,1]      [,2]      [,3]      [,4]      [,5]
    -#> [1,] -2.275543 -2.275543 -2.215118 -2.215118 -2.275543
    -#> 
    -#> , , 2
    -#> 
    -#>          [,1]     [,2]     [,3]     [,4]     [,5]
    -#> [1,] 53.46455 53.48927 53.48927 53.46455 53.46455
    -#> 
    -#> 
    -#> 
    -#> $properties
    -#> named list()
    -#> 
    -#> attr(,"class")
    -#> [1] "polygon"
    -
    lawn_polygon(rings) %>% view
    -
    -

    polygon

    - -

    Random set of points

    -
    lawn_random(n = 2)
    -#> $type
    -#> [1] "FeatureCollection"
    -#> 
    -#> $features
    -#>      type geometry.type  geometry.coordinates
    -#> 1 Feature         Point -137.46327, -63.46154
    -#> 2 Feature         Point  -110.68426, 83.10533
    -#> 
    -#> attr(,"class")
    -#> [1] "featurecollection"
    -
    lawn_random(n = 5) %>% view
    -
    -

    rand1

    - -

    Or, use a different Javascript library (geojson-random) to create random features.

    - -

    Positions

    -
    gr_position()
    -#> [1] -179.77996   45.99018
    -
    -

    Points

    -
    gr_point(2)
    -#> $type
    -#> [1] "FeatureCollection"
    -#> 
    -#> $features
    -#>      type geometry.type geometry.coordinates
    -#> 1 Feature         Point   5.83895, -27.77218
    -#> 2 Feature         Point   78.50177, 14.95840
    -#> 
    -#> attr(,"class")
    -#> [1] "featurecollection"
    -
    gr_point(2) %>% view
    -
    -

    rand2

    - -

    Polygons

    -
    gr_polygon(n = 1, vertices = 5, max_radial_length = 5)
    -#> $type
    -#> [1] "FeatureCollection"
    -#> 
    -#> $features
    -#>      type geometry.type
    -#> 1 Feature       Polygon
    -#>                                                                                                           geometry.coordinates
    -#> 1 67.58827, 67.68551, 67.00091, 66.70156, 65.72578, 67.58827, -42.11340, -42.69850, -43.54866, -42.42758, -41.76731, -42.11340
    -#> 
    -#> attr(,"class")
    -#> [1] "featurecollection"
    -
    gr_polygon(n = 1, vertices = 5, max_radial_length = 5) %>% view
    -
    -

    rand3

    - -

    count

    - -

    Count number of points within polygons, appends a new field to properties (see the count field)

    -
    lawn_count(polygons = lawn_data$polygons_count, points = lawn_data$points_count)
    -#> $type
    -#> [1] "FeatureCollection"
    -#> 
    -#> $features
    -#>      type pt_count geometry.type
    -#> 1 Feature        2       Polygon
    -#> 2 Feature        0       Polygon
    -#>                                                                                           geometry.coordinates
    -#> 1 -112.07239, -112.07239, -112.02810, -112.02810, -112.07239, 46.58659, 46.61761, 46.61761, 46.58659, 46.58659
    -#> 2 -112.02398, -112.02398, -111.96613, -111.96613, -112.02398, 46.57043, 46.61502, 46.61502, 46.57043, 46.57043
    -#> 
    -#> attr(,"class")
    -#> [1] "featurecollection"
    -
    -

    distance

    - -

    Define two points

    -
    from <- '{
    - "type": "Feature",
    - "properties": {},
    - "geometry": {
    -   "type": "Point",
    -   "coordinates": [-75.343, 39.984]
    - }
    -}'
    -to <- '{
    -  "type": "Feature",
    -  "properties": {},
    -  "geometry": {
    -    "type": "Point",
    -    "coordinates": [-75.534, 39.123]
    -  }
    -}'
    -
    -

    Calculate distance, default units is kilometers (default output: km)

    -
    lawn_distance(from, to)
    -#> [1] 97.15958
    -
    -

    sample from a FeatureCollection

    -
    dat <- lawn_data$points_average
    -cat(dat)
    -#> {
    -#>   "type": "FeatureCollection",
    -#>   "features": [
    -#>     {
    -#>       "type": "Feature",
    -#>       "properties": {
    -#>         "population": 200
    -#>       },
    -#>       "geometry": {
    -#>         "type": "Point",
    -...
    -
    -

    Sample 2 points at random

    -
    lawn_sample(dat, 2)
    -#> $type
    -#> [1] "FeatureCollection"
    -#> 
    -#> $features
    -#>      type population geometry.type geometry.coordinates
    -#> 1 Feature        200         Point   10.80643, 59.90891
    -#> 2 Feature        600         Point   10.71579, 59.90478
    -#> 
    -#> attr(,"class")
    -#> [1] "featurecollection"
    -
    -

    extent

    - -

    Calculates the extent of all input features in a FeatureCollection, and returns a bounding box.

    -
    lawn_extent(lawn_data$points_average)
    -#> [1] 10.71579 59.90478 10.80643 59.93162
    -
    -

    buffer

    - -

    Calculates a buffer for input features for a given radius.

    -
    dat <- '{
    - "type": "Feature",
    - "properties": {},
    - "geometry": {
    -     "type": "Polygon",
    -     "coordinates": [[
    -       [-112.072391,46.586591],
    -       [-112.072391,46.61761],
    -       [-112.028102,46.61761],
    -       [-112.028102,46.586591],
    -       [-112.072391,46.586591]
    -     ]]
    -   }
    -}'
    -view(dat)
    -
    -

    buffer1

    -
    lawn_buffer(dat, 1, "miles") %>% view
    -
    -

    buffer2

    - -

    Union polygons together

    -
    poly1 <- '{
    - "type": "Feature",
    - "properties": {
    -   "fill": "#0f0"
    - },
    - "geometry": {
    -   "type": "Polygon",
    -   "coordinates": [[
    -     [-122.801742, 45.48565],
    -     [-122.801742, 45.60491],
    -     [-122.584762, 45.60491],
    -     [-122.584762, 45.48565],
    -     [-122.801742, 45.48565]
    -    ]]
    - }
    -}'
    -
    -poly2 <- '{
    - "type": "Feature",
    - "properties": {
    -   "fill": "#00f"
    - },
    - "geometry": {
    -   "type": "Polygon",
    -   "coordinates": [[
    -     [-122.520217, 45.535693],
    -     [-122.64038, 45.553967],
    -     [-122.720031, 45.526554],
    -     [-122.669906, 45.507309],
    -     [-122.723464, 45.446643],
    -     [-122.532577, 45.408574],
    -     [-122.487258, 45.477466],
    -     [-122.520217, 45.535693]
    -     ]]
    - }
    -}'
    -view(poly1)
    -
    -

    union1

    -
    view(poly2)
    -
    -

    union2

    - -

    Visualize union-ed polygons

    -
    lawn_union(poly1, poly2) %>% view
    -
    -

    union3

    - -

    See also lawn_merge() and lawn_intersect().

    - -

    lint input geojson

    - -

    For most functions, you can lint your input geojson data to make sure it is proper geojson. We use -the javascript library geojsonhint. See the lint parameter.

    - -

    Good GeoJSON

    -
    dat <- '{
    -  "type": "FeatureCollection",
    -  "features": [
    -    {
    -      "type": "Feature",
    -      "properties": {
    -        "population": 200
    -      },
    -      "geometry": {
    -        "type": "Point",
    -        "coordinates": [10.724029, 59.926807]
    -      }
    -    }
    -  ]
    -}'
    -lawn_extent(dat)
    -#> [1] 10.72403 59.92681 10.72403 59.92681
    -
    -

    Bad GeoJSON

    -
    dat <- '{
    -  "type": "FeatureCollection",
    -  "features": [
    -    {
    -      "type": "Feature",
    -      "properties": {
    -        "population": 200
    -      },
    -      "geometry": {
    -        "type": "Point"
    -      }
    -    }
    -  ]
    -}'
    -lawn_extent(dat, lint = TRUE)
    -
    -#> Error: Line 1 - "coordinates" property required
    -
    -

    To do

    - -
      -
    • As Turf.js changes, we'll update lawn
    • -
    • Performance improvements. We realize that this package is slower than the C based rgdal/rgeos - we are looking into ways to increaes performance to get closer to the performance of those packages.
    • -
    + `openadds` talks to [Openaddresses.io](http://openaddresses.io/). a run down of its things: + +## Install + + +```r +devtools::install_github("sckott/openadds") +``` + + +```r +library("openadds") +``` + +## List datasets + +Scrapes links to datasets from the openaddresses site + + +```r +dat <- oa_list() +dat[2:6] +#> [1] "http://data.openaddresses.io.s3.amazonaws.com/20150511/au-tas-launceston.csv" +#> [2] "http://s3.amazonaws.com/data.openaddresses.io/20141127/au-victoria.zip" +#> [3] "http://data.openaddresses.io.s3.amazonaws.com/20150511/be-flanders.zip" +#> [4] "http://data.openaddresses.io.s3.amazonaws.com/20150417/ca-ab-calgary.zip" +#> [5] "http://data.openaddresses.io.s3.amazonaws.com/20150511/ca-ab-grande_prairie.zip" +``` + +## Search for datasets + +Uses `oa_list()` internally, then searches through columns requested. + + +```r +oa_search(country = "us", state = "ca") +#> Source: local data frame [68 x 5] +#> +#> country state city ext +#> 1 us ca san_mateo_county .zip +#> 2 us ca alameda_county .zip +#> 3 us ca alameda_county .zip +#> 4 us ca amador .zip +#> 5 us ca amador .zip +#> 6 us ca bakersfield .zip +#> 7 us ca bakersfield .zip +#> 8 us ca berkeley .zip +#> 9 us ca berkeley .zip +#> 10 us ca butte_county .zip +#> .. ... ... ... ... +#> Variables not shown: url (chr) +``` + +## Get data + +Passing in a URL + + +```r +(out1 <- oa_get(dat[5])) +#> ~/.openadds/ca-ab-calgary.zip +#> Dimensions [350962, 13] +#> +#> OBJECTID ADDRESS_TY ADDRESS STREET_NAM STREET_TYP +#> 0 757023 Parcel 249 SAGE MEADOWS CI NW SAGE MEADOWS CI +#> 1 757022 Parcel 2506 17 ST SE 17 ST +#> 2 757021 Parcel 305 EVANSPARK GD NW EVANSPARK GD +#> 3 757020 Parcel 321 EVANSPARK GD NW EVANSPARK GD +#> 4 757019 Parcel 204 EVANSBROOKE LD NW EVANSBROOKE LD +#> 5 757018 Parcel 200 EVANSBROOKE LD NW EVANSBROOKE LD +#> 6 757017 Parcel 219 HIDDEN VALLEY LD NW HIDDEN VALLEY LD +#> 7 757016 Parcel 211 HIDDEN VALLEY LD NW HIDDEN VALLEY LD +#> 8 757015 Parcel 364 HIDDEN VALLEY LD NW HIDDEN VALLEY LD +#> 9 757014 Parcel 348 HIDDEN VALLEY LD NW HIDDEN VALLEY LD +#> .. ... ... ... ... ... +#> Variables not shown: STREET_QUA (fctr), HOUSE_NUMB (int), HOUSE_ALPH +#> (fctr), SUITE_NUMB (int), SUITE_ALPH (fctr), LONGITUDE (dbl), +#> LATITUDE (dbl), COMM_NAME (fctr) +``` + +First getting URL for dataset through `as_openadd()`, then passing to `oa_get()` + + +```r +(x <- as_openadd("us", "nm", "hidalgo")) +#> <> +#> <> us +#> <> nm +#> <> hidalgo +#> <> .csv +``` + + +```r +oa_get(x) +#> ~/.openadds/us-nm-hidalgo.csv +#> Dimensions [170659, 37] +#> +#> OBJECTID Shape ADD_NUM ADD_SUF PRE_MOD PRE_DIR PRE_TYPE ST_NAME +#> 1 1 NA 422 S 2ND +#> 2 2 NA 1413 S 4TH +#> 3 3 NA 412 E CHAMPION +#> 4 4 NA 110 E SAMANO +#> 5 5 NA 2608 W FREDDY GONZALEZ +#> 6 6 NA 2604 W FREDDY GONZALEZ +#> 7 7 NA 1123 W FAY +#> 8 8 NA 417 S 2ND +#> 9 9 NA 4551 E TEXAS +#> 10 10 NA 810 DRIFTWOOD +#> .. ... ... ... ... ... ... ... ... +#> Variables not shown: ST_TYPE (chr), POS_DIR (chr), POS_MOD (chr), ESN +#> (int), MSAG_COMM (chr), PARCEL_ID (chr), PLACE_TYPE (chr), LANDMARK +#> (chr), BUILDING (chr), UNIT (chr), ROOM (chr), FLOOR (int), LOC_NOTES +#> (chr), ST_ALIAS (chr), FULL_ADDR (chr), ZIP (chr), POSTAL_COM (chr), +#> MUNICIPAL (chr), COUNTY (chr), STATE (chr), SOURCE (chr), REGION +#> (chr), EXCH (chr), LAT (dbl), LONG (dbl), PICTURE (chr), OA:x (dbl), +#> OA:y (dbl), OA:geom (chr) +``` + +## Combine multiple datasets + +`combine` attemps to guess lat/long and address columns, but definitely more work to do to make +this work for most cases. Lat/long and address columns vary among every dataset - some datasets +have no lat/long data, some have no address data. + + +```r +out2 <- oa_get(dat[32]) +(alldat <- oa_combine(out1, out2)) +#> Source: local data frame [418,623 x 4] +#> +#> lon lat address dataset +#> 1 -114.1303 51.17188 249 SAGE MEADOWS CI NW ca-ab-calgary.zip +#> 2 -114.0190 51.03168 2506 17 ST SE ca-ab-calgary.zip +#> 3 -114.1175 51.17497 305 EVANSPARK GD NW ca-ab-calgary.zip +#> 4 -114.1175 51.17461 321 EVANSPARK GD NW ca-ab-calgary.zip +#> 5 -114.1212 51.16268 204 EVANSBROOKE LD NW ca-ab-calgary.zip +#> 6 -114.1213 51.16264 200 EVANSBROOKE LD NW ca-ab-calgary.zip +#> 7 -114.1107 51.14784 219 HIDDEN VALLEY LD NW ca-ab-calgary.zip +#> 8 -114.1108 51.14768 211 HIDDEN VALLEY LD NW ca-ab-calgary.zip +#> 9 -114.1121 51.14780 364 HIDDEN VALLEY LD NW ca-ab-calgary.zip +#> 10 -114.1117 51.14800 348 HIDDEN VALLEY LD NW ca-ab-calgary.zip +#> .. ... ... ... ... +``` + +## Map data + +Get some data + + +```r +(out <- oa_get(dat[400])) +#> ~/.openadds/us-ca-sonoma_county.zip +#> Dimensions [217243, 5] +#> +#> LON LAT NUMBER STREET POSTCODE +#> 1 -122.5327 38.29779 3771 A Cory Lane NA +#> 2 -122.5422 38.30354 18752 White Oak Drive NA +#> 3 -122.5412 38.30327 18749 White Oak Drive NA +#> 4 -122.3997 38.26122 3552 Napa Road NA +#> 5 -122.5425 38.30404 3998 White Oak Court NA +#> 6 -122.5429 38.30434 4026 White Oak Court NA +#> 7 -122.5430 38.30505 4039 White Oak Court NA +#> 8 -122.5417 38.30504 4017 White Oak Court NA +#> 9 -122.5409 38.30436 18702 White Oak Drive NA +#> 10 -122.5403 38.30392 18684 White Oak Drive NA +#> .. ... ... ... ... ... +``` + +Make an interactive map (not all data) + + +```r +library("leaflet") + +x <- oa_get(oa_search(country = "us", city = "boulder")[1,]$url) +y <- oa_get(oa_search(country = "us", city = "gunnison")[1,]$url) +oa_combine(x, y) %>% + leaflet() %>% + addTiles() %>% + addCircles(lat = ~lat, lng = ~lon, popup = ~address) +``` + +![image](/public/img/2015-05-18-openadds/map.png) + +## To do + +* Surely there are many datasets that won't work in `oa_combine()` - gotta go through many more. +* An easy viz function wrapping `leaflet` +* Since you can get a lot of spatial data quickly, easy way to visualize big data, maybe marker clusters?
    diff --git a/_site/page40/index.html b/_site/page40/index.html index 785d98f442..1fb3c3c89e 100644 --- a/_site/page40/index.html +++ b/_site/page40/index.html @@ -61,40 +61,40 @@

    Recology

    - - Short on funding? Can't get a grant? Crowdfunding! #SciFund + + R tutorial on visualizations/graphics

    - + -

    Crowdsourced funding is becoming a sustainable way for various artists, entrepreneurs, etc. to get their idea funded from individuals. For example, think of Kickstarter and RocketHub.


    Jai Ranganathan and Jarrett Byrnes have started an experiment to determine how well crowdfunding can work for scientists: The SciFund Challenge. Go here to signup and here for their website.


    The deadline to sign up is Oct. 1

    + Rolf Lohaus, a Huxley postdoctoral fellow here in the EEB dept at Rice University, gave our R course a talk on basic visualizations in R this morning.

    Enjoy!


    - - Ten Simple Rules for OA Publishers talk by Philip Bourne + + Short on funding? Can't get a grant? Crowdfunding! #SciFund

    - + - + Crowdsourced funding is becoming a sustainable way for various artists, entrepreneurs, etc. to get their idea funded from individuals. For example, think of Kickstarter and RocketHub.


    Jai Ranganathan and Jarrett Byrnes have started an experiment to determine how well crowdfunding can work for scientists: The SciFund Challenge. Go here to signup and here for their website.


    The deadline to sign up is Oct. 1

    - - @drewconway interview on @DataNoBorders at the Strata conference + + Ten Simple Rules for OA Publishers talk by Philip Bourne

    - + -

    The O'Reilly Media Strata Summit has many interviews on YouTube (just search YouTube for it)

    Drew Conway is the author of a R packages, including infochimps, an R wrapper to the Infochimps API service.

    The YouTube video:


    +
    diff --git a/_site/page41/index.html b/_site/page41/index.html index e6729389ff..3047ac4c8e 100644 --- a/_site/page41/index.html +++ b/_site/page41/index.html @@ -61,44 +61,47 @@

    Recology

    - - Open science talk by Carl Boettiger + + @drewconway interview on @DataNoBorders at the Strata conference

    - - -

    Carl Boettiger gave a talk on the topic of open science to incoming UC Davis graduate students.

    - -

    Here is the audio click here

    + -

    Here are the slides clickhere

    + The O'Reilly Media Strata Summit has many interviews on YouTube (just search YouTube for it)

    Drew Conway is the author of a R packages, including infochimps, an R wrapper to the Infochimps API service.

    The YouTube video:


    - - My take on an R introduction talk + + Open science talk by Carl Boettiger

    - + + + Carl Boettiger gave a talk on the topic of open science to incoming UC Davis graduate students. + +Here is the audio [click here][here] + +Here are the slides [clickhere][here2] -

    UPDATE: I put in an R tutorial as a Github gist below.


    Here is a short intro R talk I gave today...for what it's worth...



    +[here]: http://www.archive.org/details/ThingsIWishIKnewThreeYearsAgo-ByTheDavisOpenScienceGroup&reCache=1 +[here2]: http://hazelnusse.github.com/DOS_WOW2011/#title-slide

    - - A Data Visualization Book + + My take on an R introduction talk

    - + -

    Note: thanks to Scott for inviting me to contribute to the Recology blog despite being an ecology outsider; my work is primarily in atomic physics. -Pascal

    A part of me has always liked thinking about how to effectively present information, but until the past year, I had not read much to support my (idle) interest in information visualization. That changed in the spring when I read Edward Tufte's The Visual Display of Quantitative Information, a book that stimulated me to think more deeply about presenting information. I originally started with a specific task in mind--a wonderful tool for focusing one's interests--but quickly found that Tufte's book was less a practical guide and more a list of general design principles. Then, a few months ago, I stumbled upon Nathan Yau's blog, FlowingData, and found out he was writing a practical guide to design and visualization. Conveniently enough for me, Yau's book, Visualize This, would be released within a month of my discovery of his blog; what follows are my impressions of Visualize This.

    I have liked Visualize This a lot.  Yau writes with much the same informal tone as on his blog, and the layout is visually pleasing (good thing, too, for a book about visualizing information!).  The first few chapters are pretty basic if you have done much data manipulation before, but it is really nice to have something laid out so concisely.  The examples are good, too, in that he is very explicit about every step: there is no intuiting what that missing step should be.  The author even acknowledges in the introduction that the first part of the book is at an introductory level.

    Early in the book, Yau discusses where to obtain data. This compilation of sources is potentially a useful reference for someone, like me, who almost always generates his own data in the lab. Unfortunately, Yau does not talk much about preparation of (or best practices for) your own data.  Additionally, from the perspective of a practicing scientist, it would have been nice to hear about how to archive data to make sure it is readable far into the future, but that is probably outside the scope of the book.

    Yau seems really big into using open source software for getting and analyzing data (e.g. Python, R, etc…), but he is surprisingly attached to the proprietary Adobe Illustrator for turning figures into presentation quality graphics.  He says that he feels like the default options in most analysis programs do not make for very good quality graphics (and he is right), but he does not really acknowledge that you can generate nice output if you go beyond the default settings.  For me, the primary advantage of generating output programmatically is that it is easy to regenerate when you need to change the data or the formatting on the plot.  Using a graphical user interface, like in Adobe Illustrator, is nice if you are only doing something once (how often does that happen?), but when you have to regenerate the darn figure fifty times to satisfy your advisor, it gets tedious to move things around pixel by pixel.

    By the time I reached the middle chapters, I started finding many of the details to be repetitive. Part of this repetition stems from the fact that Yau divides these chapters by the type of visualization. For example, "Visualizing Proportions" and "Visualizing Relationships" are two of the chapter titles. While I think these distinctions are important ones for telling the right story about one's data, creating figures for the different data types often boils down to choosing different functions in R or Python. People with less analysis and presentation experience should find the repetition helpful, but I increasingly skimmed these sections as I went along.  

    Working through Yau's examples for steps you do not already know would probably be the most useful way of getting something out of the book.  So, for example, I started trying to use Python to scrape data from a webpage, something I had not previously done.  I followed the book's example of this data-scraping just fine, but as with most things in programming, you find all sorts of minor hurdles to clear when you try your own thing. In my case, I am re-learning the Python I briefly learned about 10 years ago--partly in anticipation of not having access to Matlab licenses once I vacate the academy--since I have forgotten a lot of the syntax.  A lot of this stuff would be faster if I were working in Matlab which I grew more familiar with in graduate school.

    Overall, Visualize This is a really nice looking book and will continue to be useful to me as a reference. Yau concludes his book with a refreshing reminder to provide context for the data we present. This advice is particularly relevant when presenting to a wider or lay audience, but it is still important for us, as scientists, to clearly communicate our findings in the literature. Patterns in the data are not often self-evident, and therefore we should think carefully about which visualization tools will best convey the meaning of our results.

    Edited to add a link to Visualize This here and in the introductory paragraph.

    + UPDATE: I put in an R tutorial as a Github gist below.


    Here is a short intro R talk I gave today...for what it's worth...



    diff --git a/_site/page42/index.html b/_site/page42/index.html index c9ee53ec61..f838789fcd 100644 --- a/_site/page42/index.html +++ b/_site/page42/index.html @@ -61,40 +61,40 @@

    Recology

    - - FigShare Talk + + A Data Visualization Book

    - + -

    FigShare - I very much like this idea of a place to put your data online that is NOT published. Dryad is a nice place for datastes linked with published papers, but there isn't really a place for datasets that perhaps did not make the cut for a published paper, and if known to the scientific community, could potentially help resolve the "file-drawer" effect in meta-analyses. (wow, run on sentence)

     
    "Figshare - Why don't you publish all your research?" Mark Hahnel Imperial College London from London Biogeeks on Vimeo.

    + Note: thanks to Scott for inviting me to contribute to the Recology blog despite being an ecology outsider; my work is primarily in atomic physics. -Pascal

    A part of me has always liked thinking about how to effectively present information, but until the past year, I had not read much to support my (idle) interest in information visualization. That changed in the spring when I read Edward Tufte's The Visual Display of Quantitative Information, a book that stimulated me to think more deeply about presenting information. I originally started with a specific task in mind--a wonderful tool for focusing one's interests--but quickly found that Tufte's book was less a practical guide and more a list of general design principles. Then, a few months ago, I stumbled upon Nathan Yau's blog, FlowingData, and found out he was writing a practical guide to design and visualization. Conveniently enough for me, Yau's book, Visualize This, would be released within a month of my discovery of his blog; what follows are my impressions of Visualize This.

    I have liked Visualize This a lot.  Yau writes with much the same informal tone as on his blog, and the layout is visually pleasing (good thing, too, for a book about visualizing information!).  The first few chapters are pretty basic if you have done much data manipulation before, but it is really nice to have something laid out so concisely.  The examples are good, too, in that he is very explicit about every step: there is no intuiting what that missing step should be.  The author even acknowledges in the introduction that the first part of the book is at an introductory level.

    Early in the book, Yau discusses where to obtain data. This compilation of sources is potentially a useful reference for someone, like me, who almost always generates his own data in the lab. Unfortunately, Yau does not talk much about preparation of (or best practices for) your own data.  Additionally, from the perspective of a practicing scientist, it would have been nice to hear about how to archive data to make sure it is readable far into the future, but that is probably outside the scope of the book.

    Yau seems really big into using open source software for getting and analyzing data (e.g. Python, R, etc…), but he is surprisingly attached to the proprietary Adobe Illustrator for turning figures into presentation quality graphics.  He says that he feels like the default options in most analysis programs do not make for very good quality graphics (and he is right), but he does not really acknowledge that you can generate nice output if you go beyond the default settings.  For me, the primary advantage of generating output programmatically is that it is easy to regenerate when you need to change the data or the formatting on the plot.  Using a graphical user interface, like in Adobe Illustrator, is nice if you are only doing something once (how often does that happen?), but when you have to regenerate the darn figure fifty times to satisfy your advisor, it gets tedious to move things around pixel by pixel.

    By the time I reached the middle chapters, I started finding many of the details to be repetitive. Part of this repetition stems from the fact that Yau divides these chapters by the type of visualization. For example, "Visualizing Proportions" and "Visualizing Relationships" are two of the chapter titles. While I think these distinctions are important ones for telling the right story about one's data, creating figures for the different data types often boils down to choosing different functions in R or Python. People with less analysis and presentation experience should find the repetition helpful, but I increasingly skimmed these sections as I went along.  

    Working through Yau's examples for steps you do not already know would probably be the most useful way of getting something out of the book.  So, for example, I started trying to use Python to scrape data from a webpage, something I had not previously done.  I followed the book's example of this data-scraping just fine, but as with most things in programming, you find all sorts of minor hurdles to clear when you try your own thing. In my case, I am re-learning the Python I briefly learned about 10 years ago--partly in anticipation of not having access to Matlab licenses once I vacate the academy--since I have forgotten a lot of the syntax.  A lot of this stuff would be faster if I were working in Matlab which I grew more familiar with in graduate school.

    Overall, Visualize This is a really nice looking book and will continue to be useful to me as a reference. Yau concludes his book with a refreshing reminder to provide context for the data we present. This advice is particularly relevant when presenting to a wider or lay audience, but it is still important for us, as scientists, to clearly communicate our findings in the literature. Patterns in the data are not often self-evident, and therefore we should think carefully about which visualization tools will best convey the meaning of our results.

    Edited to add a link to Visualize This here and in the introductory paragraph.

    - - Jonathan Eisen on the Fourth Domain and Open Science + + FigShare Talk

    - + -




    Stalking the Fourth Domain with Jonathan Eisen, Ph D from mendelspod on Vimeo.

    + FigShare - I very much like this idea of a place to put your data online that is NOT published. Dryad is a nice place for datastes linked with published papers, but there isn't really a place for datasets that perhaps did not make the cut for a published paper, and if known to the scientific community, could potentially help resolve the "file-drawer" effect in meta-analyses. (wow, run on sentence)

     
    "Figshare - Why don't you publish all your research?" Mark Hahnel Imperial College London from London Biogeeks on Vimeo.

    - - rnpn: An R interface for the National Phenology Network + + Jonathan Eisen on the Fourth Domain and Open Science

    - + -

    The team at rOpenSci and I have been working on a wrapper for the USA National Phenology Network API. The following is a demo of some of the current possibilities. We will have more functions down the road. Get the publicly available code, and contribute, at Github here. If you try this out look at the Description file for the required R packages to run rnpn. Let us know at Github (here) or at our website  http://ropensci.org/, or in the comments below, or on twitter (@rOpenSci), what use cases you would like to see with the rnpn package.

    Method and demo of each:
    Get observations for species by day
    From the documentation: "This function will return a list of species, containing all the dates which observations were made about the species, and a count of the number of such observations made on that date."

    #### Note, the data below is truncated for blogging brevity...

    > getobsspbyday(c(1, 2), '2008-01-01', '2011-12-31') # Searched for species 1 and 2 from Jan 1, 2008 to Dec 31, 2011
    date count species
    1 2009-03-08 2 species 1
    2 2009-03-15 1 species 1
    3 2009-03-22 1 species 1
    4 2009-03-24 1 species 1
    5 2009-03-26 1 species 1
    6 2009-04-17 1 species 1
    7 2009-04-24 1 species 1
    8 2009-05-12 1 species 1
    9 2009-05-20 1 species 1
    10 2009-11-24 1 species 1
    11 2009-12-07 1 species 1
    12 2010-01-18 1 species 1
    13 2010-01-23 1 species 1
    62 2011-05-29 1 species 1
    63 2011-06-27 1 species 1
    64 2011-06-30 2 species 1
    65 2009-03-17 1 species 2
    66 2009-04-03 3 species 2
    67 2009-04-05 3 species 2
    68 2009-04-10 3 species 2
    69 2009-04-17 3 species 2






    Get individuals at specific stations
    From the documentation: "This function returns all of the individuals at a series of stations."

    > getindsatstations(c(507, 523)) # Searched for any individuals at stations 507 and 523
    individualid individualname speciesid kingdom
    1 1200 dogwood 12 Plantae
    2 1197 purple lilac 36 Plantae
    3 1193 white t 38 Plantae
    4 3569 forsythia-1 73 Plantae
    5 1206 jack 150 Plantae
    6 1199 trout lily 161 Plantae
    7 1198 dandy 189 Plantae
    8 1192 red t 192 Plantae
    9 1710 common lilac 36 Plantae
    10 1711 common lilac 2 36 Plantae
    11 1712 dandelion 189 Plantae







    Get individuals of species at stations
    From the documentation: "This function will return a list of all the individuals, which are members of a species, among  any number of stations."

    > getindspatstations(35, c(60, 259), 2009)  # Search for individuals of species 35 at stations 60 and 259 in year 2009
    individual
    id individualname numberobservations
    1 1715 west 5
    2 1716 east 5









    Get observation associated with particular observation
    From the documentation: "This function will return the comment associated with a particular observation."

    > getobscomm(1938) # The observation for observation number 1938
    $observation_comment
    [1] "some lower branches are bare"

    +


    Stalking the Fourth Domain with Jonathan Eisen, Ph D from mendelspod on Vimeo.
    diff --git a/_site/page43/index.html b/_site/page43/index.html index fdbe7f7e34..59edbb5c7a 100644 --- a/_site/page43/index.html +++ b/_site/page43/index.html @@ -61,46 +61,40 @@

    Recology

    - - Tenure track position in systematics at the University of Vermont + + rnpn: An R interface for the National Phenology Network

    - + -


    There is an awesome position opening up for an assistant professor in systematics at the University of Vermont. Below is the announcement, and see the original post at the Distributed Ecology blog. Why is this related to R? One can do a lot of systematics work in R, including retrieving scientific collections data through an upcoming package handshaking with VertNet (part of the rOpenSci project), managing large data sets, retrieval of GenBank data through the ape package (see fxn read.genbank), phylogenetic reconstruction and analysis, and more. So I am sure a systematist with R ninja skills will surely have a head up on the rest of the field. 



    Assistant Professor in Systematics


    Department of Biology
    University of Vermont
    Burlington, Vermont

    The Department of Biology of the University of Vermont seeks applications for a tenure- track Assistant Professor position in Systematics and Evolutionary Biology of arthropods, especially insects. The position will be open in the fall of 2012. The successful candidate will have expertise in classical and molecular systematics, including analysis of complex data sets. Candidates pursuing phylogenomics and innovative methods in bioinformatics in combination with taxonomy are especially encouraged to apply. Department information at: http://www.uvm.edu/~biology/.


    All applicants are expected to: 1) hold a Ph.D. degree in relevant disciplines and have two or more years of postdoctoral experience; 2) develop a competitively funded research program; 3) teach undergraduate courses (chosen from among general biology, evolution, systematic entomology, and others in the candidate's expertise); 4) teach, mentor and advise undergraduate and graduate students; and 5) oversee a natural history collection of historic significance.

    Candidates must apply online: http://www.uvmjobs.com/. On left see "Search Postings" then find "Biology" under "HCM Department" then posting 0040090 (first posting). Sorry, but we cannot supply the direct link because it will time out.

    Attach a cover letter with a statement of research focus and teaching interests (one document), a curriculum vitae, representative publications, and the contact information of three references.

    Review of applications will begin on September 15, 2011, and will continue until the position is filled. Questions and up to three additional publications may be directed to Dr. Jos. J. Schall: jschall@uvm.edu.

    The University of Vermont recently identified several "Spires of Excellence" in which it will strategically focus institutional investments and growth over the next several years. One spire associated with the position is Complex Systems. Candidates whose research interests align with this spire are especially encouraged to applyhttp://www.uvm.edu/~tri/.
    The University seeks faculty who can contribute to the diversity and excellence of the academic community through their research, teaching, and/or service. Applicants are requested to include in their cover letter information about how they will further this goal. The University of Vermont is an Affirmative Action/Equal Opportunity employer. The Department is committed to increasing faculty diversity and welcomes applications from women, underrepresented ethnic, racial and cultural groups, and from people with disabilities.

    + The team at rOpenSci and I have been working on a wrapper for the USA National Phenology Network API. The following is a demo of some of the current possibilities. We will have more functions down the road. Get the publicly available code, and contribute, at Github here. If you try this out look at the Description file for the required R packages to run rnpn. Let us know at Github (here) or at our website  http://ropensci.org/, or in the comments below, or on twitter (@rOpenSci), what use cases you would like to see with the rnpn package.

    Method and demo of each:
    Get observations for species by day
    From the documentation: "This function will return a list of species, containing all the dates which observations were made about the species, and a count of the number of such observations made on that date."

    #### Note, the data below is truncated for blogging brevity...
    > getobsspbyday(c(1, 2), '2008-01-01', '2011-12-31') # Searched for species 1 and 2 from Jan 1, 2008 to Dec 31, 2011
    date count species
    1 2009-03-08 2 species 1
    2 2009-03-15 1 species 1
    3 2009-03-22 1 species 1
    4 2009-03-24 1 species 1
    5 2009-03-26 1 species 1
    6 2009-04-17 1 species 1
    7 2009-04-24 1 species 1
    8 2009-05-12 1 species 1
    9 2009-05-20 1 species 1
    10 2009-11-24 1 species 1
    11 2009-12-07 1 species 1
    12 2010-01-18 1 species 1
    13 2010-01-23 1 species 1
    62 2011-05-29 1 species 1
    63 2011-06-27 1 species 1
    64 2011-06-30 2 species 1
    65 2009-03-17 1 species 2
    66 2009-04-03 3 species 2
    67 2009-04-05 3 species 2
    68 2009-04-10 3 species 2
    69 2009-04-17 3 species 2






    Get individuals at specific stations
    From the documentation: "This function returns all of the individuals at a series of stations."

    > getindsatstations(c(507, 523)) # Searched for any individuals at stations 507 and 523
    individual_id individual_name species_id kingdom
    1 1200 dogwood 12 Plantae
    2 1197 purple lilac 36 Plantae
    3 1193 white t 38 Plantae
    4 3569 forsythia-1 73 Plantae
    5 1206 jack 150 Plantae
    6 1199 trout lily 161 Plantae
    7 1198 dandy 189 Plantae
    8 1192 red t 192 Plantae
    9 1710 common lilac 36 Plantae
    10 1711 common lilac 2 36 Plantae
    11 1712 dandelion 189 Plantae







    Get individuals of species at stations
    From the documentation: "This function will return a list of all the individuals, which are members of a species, among  any number of stations."

    > getindspatstations(35, c(60, 259), 2009)  # Search for individuals of species 35 at stations 60 and 259 in year 2009
    individual_id individual_name number_observations
    1 1715 west 5
    2 1716 east 5









    Get observation associated with particular observation
    From the documentation: "This function will return the comment associated with a particular observation."

    > getobscomm(1938) # The observation for observation number 1938
    $observation_comment
    [1] "some lower branches are bare"

    - - Thursday at #ESA11 + + Tenure track position in systematics at the University of Vermont

    - + -

    Interesting talks/posters:


    Richard Lankau presented research on trade-offs and competitive ability. He suggests that during range expansion selection for increased intraspecific competitive ability in older populations leads to loss of traits for interspecific competitive traits due to trade-offs between these traits.


    Ellner emphatically states that rapid evolution DOES matter for ecological responses, and longer-term evolutionary patterns as well. [His paper on the talk he was giving came out prior to his talk, which he pointed out, good form sir]


    Lauren Sullivan gave an interesting talk on bottom up and top down effects on plant reproduction in one site of a huge network of sites doing similar nutrient and herbivory manipulations around the globe - NutNet (go here: http://nutnet.science.oregonstate.edu/).


    Laura Prugh shows in California that the engineering effects (i.e., the mounds that they make) of giant kangaroo rats are more important for the associated food web than the species interaction effects (the proxy used was just density of rats).


    Kristy Deiner suggests that chironomids are more phylogenetic similar in lakes with stocked fish relative to fishless lakes, in high elevation lakes in the Sierra Nevada. She used barcode data to generate her phylogeny of chironomids. If you have barcode data and want to search BOLD Systems site, one option is doing it from R using rbold, a package under development at rOpenSci (code at Github).


    Jessica Gurevitch presented a large working group's methods/approach to a set of reviews on invasion biology. We didn't get to see a lot of results from this work, but I personally was glad to see her explaining to a packed room the utility of meta-analysis, and comparing to the medical field in which meta-analysis is sort of the gold standard by which to draw conclusions.


    Following Jessica, Jason Fridley told us about the Evolutionary Imbalance Hypothesis (EIH) (see my notes here). He posed the problem of, when two biotas come together, what determines which species are retained in this new community and which species are left out. He listed a litany of traits/responses to measure to get at this problem, but suggested that with a little bit of "desktop ecology", we could simply ask: Is the invasability of X region related to the phylogenetic diversity of that region? In three destination regions (Eastern Deciduous Forests, Mediterranean California, and the Czech Republic) out of four there was a positive relationship between proportion of invasive plant species in a source region and the phylogenetic diversity of the source regions.

      +
      There is an awesome position opening up for an assistant professor in systematics at the University of Vermont. Below is the announcement, and see the original post at the Distributed Ecology blog. Why is this related to R? One can do a lot of systematics work in R, including retrieving scientific collections data through an upcoming package handshaking with VertNet (part of the rOpenSci project), managing large data sets, retrieval of GenBank data through the ape package (see fxn read.genbank), phylogenetic reconstruction and analysis, and more. So I am sure a systematist with R ninja skills will surely have a head up on the rest of the field. 



      Assistant Professor in Systematics


      Department of Biology
      University of Vermont
      Burlington, Vermont

      The Department of Biology of the University of Vermont seeks applications for a tenure- track Assistant Professor position in Systematics and Evolutionary Biology of arthropods, especially insects. The position will be open in the fall of 2012. The successful candidate will have expertise in classical and molecular systematics, including analysis of complex data sets. Candidates pursuing phylogenomics and innovative methods in bioinformatics in combination with taxonomy are especially encouraged to apply. Department information at: http://www.uvm.edu/~biology/.


      All applicants are expected to: 1) hold a Ph.D. degree in relevant disciplines and have two or more years of postdoctoral experience; 2) develop a competitively funded research program; 3) teach undergraduate courses (chosen from among general biology, evolution, systematic entomology, and others in the candidate's expertise); 4) teach, mentor and advise undergraduate and graduate students; and 5) oversee a natural history collection of historic significance.

      Candidates must apply online: http://www.uvmjobs.com/. On left see "Search Postings" then find "Biology" under "HCM Department" then posting 0040090 (first posting). Sorry, but we cannot supply the direct link because it will time out.

      Attach a cover letter with a statement of research focus and teaching interests (one document), a curriculum vitae, representative publications, and the contact information of three references.

      Review of applications will begin on September 15, 2011, and will continue until the position is filled. Questions and up to three additional publications may be directed to Dr. Jos. J. Schall: jschall@uvm.edu.

      The University of Vermont recently identified several "Spires of Excellence" in which it will strategically focus institutional investments and growth over the next several years. One spire associated with the position is Complex Systems. Candidates whose research interests align with this spire are especially encouraged to applyhttp://www.uvm.edu/~tri/.
      The University seeks faculty who can contribute to the diversity and excellence of the academic community through their research, teaching, and/or service. Applicants are requested to include in their cover letter information about how they will further this goal. The University of Vermont is an Affirmative Action/Equal Opportunity employer. The Department is committed to increasing faculty diversity and welcomes applications from women, underrepresented ethnic, racial and cultural groups, and from people with disabilities.

      - - Wednesday at #ESA11 + + Thursday at #ESA11

      - + -

      Interesting talks/posters:


      • Ethan White's poster describing EcologicalData.org was of course awesome given my interest in getting data into the hands of ecologists over at rOpenSci.org. Ethan also has software you can download on your machine to get the datasets you want easily - EcoData Retriever. [rOpenSci will try to take advantage of their work and allow you to call the retriever from R]
      • -
      • Carl Boettiger's talk was awesome. He explained how we need better tools to be able to predict collapses using early warning signals. He developed a way to estimate the statistical distribution of probabilities of system collapse. 
      • -
      • Jennifer Dunne: Explained how she put together an ancient network from Germany. Bravo. 
      • -
      • Carlos Melian explained his model of network buildup that starts from individuals, allows speciation, and other evolutionary processes. 
      • -
      • Rachel Winfree told us that in two sets of mutualistic plant-pollinator networks in New Jersey and California, that the least connected pollinator species were the most likely to be lost from the network with increasing agricultural intensity. 
      • -
      • Dan Cariveau suggests that pollination crop services can be stabilized even with increasing agriculture intensity if in fact pollinator species respond in different ways. That is, some pollinators may decrease in abundance with increasing ag intensity, while other species may increase - retaining overall pollination services to crops.
      • -

        + Interesting talks/posters:

        Richard Lankau presented research on trade-offs and competitive ability. He suggests that during range expansion selection for increased intraspecific competitive ability in older populations leads to loss of traits for interspecific competitive traits due to trade-offs between these traits.


        Ellner emphatically states that rapid evolution DOES matter for ecological responses, and longer-term evolutionary patterns as well. [His paper on the talk he was giving came out prior to his talk, which he pointed out, good form sir]


        Lauren Sullivan gave an interesting talk on bottom up and top down effects on plant reproduction in one site of a huge network of sites doing similar nutrient and herbivory manipulations around the globe - NutNet (go here: http://nutnet.science.oregonstate.edu/).


        Laura Prugh shows in California that the engineering effects (i.e., the mounds that they make) of giant kangaroo rats are more important for the associated food web than the species interaction effects (the proxy used was just density of rats).


        Kristy Deiner suggests that chironomids are more phylogenetic similar in lakes with stocked fish relative to fishless lakes, in high elevation lakes in the Sierra Nevada. She used barcode data to generate her phylogeny of chironomids. If you have barcode data and want to search BOLD Systems site, one option is doing it from R using rbold, a package under development at rOpenSci (code at Github).


        Jessica Gurevitch presented a large working group's methods/approach to a set of reviews on invasion biology. We didn't get to see a lot of results from this work, but I personally was glad to see her explaining to a packed room the utility of meta-analysis, and comparing to the medical field in which meta-analysis is sort of the gold standard by which to draw conclusions.


        Following Jessica, Jason Fridley told us about the Evolutionary Imbalance Hypothesis (EIH) (see my notes here). He posed the problem of, when two biotas come together, what determines which species are retained in this new community and which species are left out. He listed a litany of traits/responses to measure to get at this problem, but suggested that with a little bit of "desktop ecology", we could simply ask: Is the invasability of X region related to the phylogenetic diversity of that region? In three destination regions (Eastern Deciduous Forests, Mediterranean California, and the Czech Republic) out of four there was a positive relationship between proportion of invasive plant species in a source region and the phylogenetic diversity of the source regions.
          diff --git a/_site/page44/index.html b/_site/page44/index.html index 210ac86502..31ee95d1ac 100644 --- a/_site/page44/index.html +++ b/_site/page44/index.html @@ -61,46 +61,52 @@

          Recology

          - - Monday at #ESA11 + + Wednesday at #ESA11

          - + -

          Monday was a good day at ESA in Austin. There were a few topics I promised to report on in my blogging/tweeting.


          ...focused on open source data. Carly Strasser's presentation on guidelines for data management was awesome (including other talks in the symposium on Creating Effective Data Management Plans for Ecological Research). Although this was a good session, I can't help but wish that they had hammered home the need for open science more. Oh well. Also, they talked a lot about how, and not a lot of why we should properly curate data. Still, a good session. One issue Carly and I talked about was tracking code in versioning systems such as Github. There doesn't seem to be a culture of versioning code for analyses/simulations in ecology. But when we get there...it will be easier to share/track/collaborate on  code.

          ...used R software. David Jennings talked about a meta-analysis asking if phylogenetic distance influences competition strength in pairwise experiments. David used the metafor package in R to do his meta-analysis. Good form sir.

          ...did cool science. Matt Helmus presented a great talk on phylogenetic species area curves (likely using R, or Matlab maybe?).


          p.s. We launched rOpenSci today.


          Oddities:

          • The Tilman effect - Tilman's talk was so packed it looked like there was a line waiting to get into a trendy bar. Here's a picture (credit: Jaime Ashander). Bigger room next time anyone? 
          • -
          • Wiley came out with an open source journal called Ecology and Evolution. This brings them to 3 open source journals (the other two are in other fields). We (rOpenSci) will attempt to hand-shake with these journals. 
          • -
          • The vegetarian lunch option was surprisingly good. Nice. 
          • -

          + Interesting talks/posters:

          • Ethan White's poster describing EcologicalData.org was of course awesome given my interest in getting data into the hands of ecologists over at rOpenSci.org. Ethan also has software you can download on your machine to get the datasets you want easily - EcoData Retriever. [rOpenSci will try to take advantage of their work and allow you to call the retriever from R]
          • +
          • Carl Boettiger's talk was awesome. He explained how we need better tools to be able to predict collapses using early warning signals. He developed a way to estimate the statistical distribution of probabilities of system collapse. 
          • +
          • Jennifer Dunne: Explained how she put together an ancient network from Germany. Bravo. 
          • +
          • Carlos Melian explained his model of network buildup that starts from individuals, allows speciation, and other evolutionary processes. 
          • +
          • Rachel Winfree told us that in two sets of mutualistic plant-pollinator networks in New Jersey and California, that the least connected pollinator species were the most likely to be lost from the network with increasing agricultural intensity. 
          • +
          • Dan Cariveau suggests that pollination crop services can be stabilized even with increasing agriculture intensity if in fact pollinator species respond in different ways. That is, some pollinators may decrease in abundance with increasing ag intensity, while other species may increase - retaining overall pollination services to crops.
          • +

            - - (#ESA11) rOpenSci: a collaborative effort to develop R-based tools for facilitating Open Science + + Monday at #ESA11

            - + -

            Our development team would like to announce the launch of rOpenSci. As the title states, this project aims to create R packages to make open science more available to researchers.

            - - -
            http://ropensci.org/

            What this means is that we seek to connect researchers using R with as much open data as possible, mainly through APIs. There are a number of R packages that already do this (e.g., infochimpstwitteR), but we are making more packages, e.g., for MendeleyPLoS Journals, and taxonomic sources (ITISEOLTNRSPhylomaticUBio).

            Importantly, we are creating a package called rOpenSci, which aims to integrate functions from packages for individual open data sources.

            If you are somewhat interested, follow our progress on our website, on Twitter, or contact us. If you are really^2 interested you could go to Github and contribute. If  you are really^3 interested, join our development team.

            + Monday was a good day at ESA in Austin. There were a few topics I promised to report on in my blogging/tweeting.


            ...focused on open source data. Carly Strasser's presentation on guidelines for data management was awesome (including other talks in the symposium on Creating Effective Data Management Plans for Ecological Research). Although this was a good session, I can't help but wish that they had hammered home the need for open science more. Oh well. Also, they talked a lot about how, and not a lot of why we should properly curate data. Still, a good session. One issue Carly and I talked about was tracking code in versioning systems such as Github. There doesn't seem to be a culture of versioning code for analyses/simulations in ecology. But when we get there...it will be easier to share/track/collaborate on  code.

            ...used R software. David Jennings talked about a meta-analysis asking if phylogenetic distance influences competition strength in pairwise experiments. David used the metafor package in R to do his meta-analysis. Good form sir.

            ...did cool science. Matt Helmus presented a great talk on phylogenetic species area curves (likely using R, or Matlab maybe?).


            p.s. We launched rOpenSci today.


            Oddities:
            • The Tilman effect - Tilman's talk was so packed it looked like there was a line waiting to get into a trendy bar. Here's a picture (credit: Jaime Ashander). Bigger room next time anyone? 
            • +
            • Wiley came out with an open source journal called Ecology and Evolution. This brings them to 3 open source journals (the other two are in other fields). We (rOpenSci) will attempt to hand-shake with these journals. 
            • +
            • The vegetarian lunch option was surprisingly good. Nice. 
            • +

            - - Blogging/tweeting from #ESA11 + + (#ESA11) rOpenSci: a collaborative effort to develop R-based tools for facilitating Open Science

            - + -

            I will be blogging about the upcoming Ecological Society of America meeting in Austin, TX. I will focus on discussing talks/posters that:

            1. Have taken a cool approach to using data, or
            2. Have focused on open science/data, or
            3. Done something cool with R software, or
            4. Are just exciting in general

            I will also tweet throughout the meeting from @recology_ (yes the underscore is part of the name, recology was already taken). 

            The hashtag for the meeting this year is #ESA11

            + Our development team would like to announce the launch of rOpenSci. As the title states, this project aims to create R packages to make open science more available to researchers.

            + + +
            http://ropensci.org/

            What this means is that we seek to connect researchers using R with as much open data as possible, mainly through APIs. There are a number of R packages that already do this (e.g., infochimpstwitteR), but we are making more packages, e.g., for MendeleyPLoS Journals, and taxonomic sources (ITISEOLTNRSPhylomaticUBio).

            Importantly, we are creating a package called rOpenSci, which aims to integrate functions from packages for individual open data sources.

            If you are somewhat interested, follow our progress on our website, on Twitter, or contact us. If you are really^2 interested you could go to Github and contribute. If  you are really^3 interested, join our development team.
            diff --git a/_site/page45/index.html b/_site/page45/index.html index 1a0501e66c..495ce1cb92 100644 --- a/_site/page45/index.html +++ b/_site/page45/index.html @@ -61,40 +61,40 @@

            Recology

            - - Models in Evolutionary Ecology seminar, organized by Timothee Poisot + + Blogging/tweeting from #ESA11

            - + -

            Here is one of the talks by Thomas Brouquet, and see the rest here.



            Thomas Broquet by mezenvideo

            + I will be blogging about the upcoming Ecological Society of America meeting in Austin, TX. I will focus on discussing talks/posters that:

            1. Have taken a cool approach to using data, or
            2. Have focused on open science/data, or
            3. Done something cool with R software, or
            4. Are just exciting in general

            I will also tweet throughout the meeting from @recology_ (yes the underscore is part of the name, recology was already taken). 

            The hashtag for the meeting this year is #ESA11

            - - Archiving ecology/evolution data sets online + + Models in Evolutionary Ecology seminar, organized by Timothee Poisot

            - + -


            We now have many options for archiving data sets online:

            DryadKNBEcological ArchivesEcology Data PapersEcological Data, etc.

            However, these portals largely do not communicate with one another as far as I know, and there is no way to search over all data set sources, again, as far as I know. So, I wonder if it would ease finding of all these different data sets to get these different sites to get their data sets cloned on a site like Infochimps, or have links from Infochimps.  Infochimps already has APIs (and there's an R wrapper for the Infochimps API already set up here: http://cran.r-project.org/web/packages/infochimps/index.html by Drew Conway), and they have discussions set up there, etc.

            Does it make sense to post data sets linked to published works on Infochimps? I think probably not know that I think about it. But perhaps it makes sense for other data sets, or subsets of data sets that are not linked with published works to be posted there as I know at least Dryad only accepts data sets linked with published papers.

            One use case is there was a tweet from someone recently that his students were excited about getting their data sets on their resume/CV, but didn't think there was a way to put them any place where there wasn't a precondition that the data set was linked with a published work. Seems like this could be a good opportunity to place these datasets on Infcohimps, and at least they are available then where a lot of people are searching for data sets, etc.

            What I think would be ideal is if Dryad, KNB, etc. could link their datasets to Infochimps, where they could be found, then users can either get them from Infochimps, or perhaps you would have to go to the Dryad site, e.g. But at least you could search over all ecological data sets then.

            + Here is one of the talks by Thomas Brouquet, and see the rest here.



            Thomas Broquet by mez_en_video

            - - CRdata vs. Cloudnumbers + + Archiving ecology/evolution data sets online

            - + -

            Cloudnumbers and CRdata are two new cloud computing services.


            I tested the two services with a very simple script. The script simply creates a dataframe of 10000 numbers via rnorm, and assigns them to a factor of one of two levels (a or b). I then take the mean of the two factor levels with the aggregate function.


            In CRdata you need to put in some extra code to format the output in a browser window. For example, the last line below needs to have '<crdataobject>' on both sides of the output object so it can be rendered in a browser. And etc. for other things that one would print to a console. Whereas you don't need this extra code for using Cloudnumbers.

             
            dat <- data.frame(n = rnorm(10000), p = rep(c('a','b'), each=5000))
             
            out <- aggregate(n ~ p, data = dat, mean)
             
            #<crdata
            object>out</crdata_object>


            Here is a screenshot of the output from CRdata with the simple script above.

            This simple script ran in about 20 seconds or so from starting the job to finishing. However, it seems like the only output option is html. Can this be right? This seems like a terrible only option.


            In Cloudnumbers you have to start a workspace, upload your R code file.
            Then, start a session...
            choose your software platform...
            choose packages (one at a time, very slow)...
            then choose number of clusters, etc.
            Then finally star the job.
            Then it initializes, then finally you can open the console, and
            Then from here it is like running R as you normally would, except on the web.


            Who wins (at least for our very minimal example above)

            1. Speed of entire process (not just running code): CRdata
            2. Ease of use: CRdata
            3. Cost: CRdata (free only)
            4. Least annoying: Cloudnumbers (you don't have to add in extra code to run your own code)
            5. Opensource: CRdata (you can use publicly available code on the site)
            6. Long-term use: Cloudnumbers (more powerful, flexible, etc.)

            I imagine Cloudnumbers could be faster for larger jobs, but you would have to pay for the speed of course. 

            What I really want to see is a cloud computing service that accepts code directly run from R or RStudio. Hmmm...that would be so tasty indeed. I think Cloudnumbers may be able to do this, but haven't tested it yet.  

            Perhaps using the server version of RStudio along with Amazon's EC2 is a better option than both of these. See Karthik Ram's post about using RStudio server along with Amazon's EC2. Even just running RStudio server on your Unbuntu machine or virtual machine is a pretty cool option, even without EC2 (works like a charm on my Parallels Ubuntu vm on my Mac). 

            +
            We now have many options for archiving data sets online:

            DryadKNBEcological ArchivesEcology Data PapersEcological Data, etc.

            However, these portals largely do not communicate with one another as far as I know, and there is no way to search over all data set sources, again, as far as I know. So, I wonder if it would ease finding of all these different data sets to get these different sites to get their data sets cloned on a site like Infochimps, or have links from Infochimps.  Infochimps already has APIs (and there's an R wrapper for the Infochimps API already set up here: http://cran.r-project.org/web/packages/infochimps/index.html by Drew Conway), and they have discussions set up there, etc.

            Does it make sense to post data sets linked to published works on Infochimps? I think probably not know that I think about it. But perhaps it makes sense for other data sets, or subsets of data sets that are not linked with published works to be posted there as I know at least Dryad only accepts data sets linked with published papers.

            One use case is there was a tweet from someone recently that his students were excited about getting their data sets on their resume/CV, but didn't think there was a way to put them any place where there wasn't a precondition that the data set was linked with a published work. Seems like this could be a good opportunity to place these datasets on Infcohimps, and at least they are available then where a lot of people are searching for data sets, etc.

            What I think would be ideal is if Dryad, KNB, etc. could link their datasets to Infochimps, where they could be found, then users can either get them from Infochimps, or perhaps you would have to go to the Dryad site, e.g. But at least you could search over all ecological data sets then.
            diff --git a/_site/page46/index.html b/_site/page46/index.html index 77d8fce31e..7daef93ebd 100644 --- a/_site/page46/index.html +++ b/_site/page46/index.html @@ -61,40 +61,40 @@

            Recology

            - - rbold: An R Interface for Bold Systems barcode repository + + CRdata vs. Cloudnumbers

            - + -

            Have you ever wanted to search and fetch barcode data from Bold Systems?

            I am developing functions to interface with Bold from R. I just started, but hopefully folks will find it useful.

            The code is at Github here. The two functions are still very buggy, so please bring up issues below, or in the Issues area on Github. For example, some searches work and other similar searches don't. Apologies in advance for the bugs.

            Below is a screenshot of an example query using function getsampleids to get barcode identifiers for specimens. You can then use getseqs function to grab barcode data for specific specimens or many specimens.
            Screen shot 2011-06-28 at 9.24.00 AM.png

            + Cloudnumbers and CRdata are two new cloud computing services.


            I tested the two services with a very simple script. The script simply creates a dataframe of 10000 numbers via rnorm, and assigns them to a factor of one of two levels (a or b). I then take the mean of the two factor levels with the aggregate function.


            In CRdata you need to put in some extra code to format the output in a browser window. For example, the last line below needs to have '<crdata_object>' on both sides of the output object so it can be rendered in a browser. And etc. for other things that one would print to a console. Whereas you don't need this extra code for using Cloudnumbers.

             
            dat <- data.frame(n = rnorm(10000), p = rep(c('a','b'), each=5000))
             
            out <- aggregate(n ~ p, data = dat, mean)
             
            #<crdata_object>out</crdata_object>


            Here is a screenshot of the output from CRdata with the simple script above.

            This simple script ran in about 20 seconds or so from starting the job to finishing. However, it seems like the only output option is html. Can this be right? This seems like a terrible only option.


            In Cloudnumbers you have to start a workspace, upload your R code file.
            Then, start a session...
            choose your software platform...
            choose packages (one at a time, very slow)...
            then choose number of clusters, etc.
            Then finally star the job.
            Then it initializes, then finally you can open the console, and
            Then from here it is like running R as you normally would, except on the web.


            Who wins (at least for our very minimal example above)

            1. Speed of entire process (not just running code): CRdata
            2. Ease of use: CRdata
            3. Cost: CRdata (free only)
            4. Least annoying: Cloudnumbers (you don't have to add in extra code to run your own code)
            5. Opensource: CRdata (you can use publicly available code on the site)
            6. Long-term use: Cloudnumbers (more powerful, flexible, etc.)

            I imagine Cloudnumbers could be faster for larger jobs, but you would have to pay for the speed of course. 

            What I really want to see is a cloud computing service that accepts code directly run from R or RStudio. Hmmm...that would be so tasty indeed. I think Cloudnumbers may be able to do this, but haven't tested it yet.  

            Perhaps using the server version of RStudio along with Amazon's EC2 is a better option than both of these. See Karthik Ram's post about using RStudio server along with Amazon's EC2. Even just running RStudio server on your Unbuntu machine or virtual machine is a pretty cool option, even without EC2 (works like a charm on my Parallels Ubuntu vm on my Mac). 

            - - iEvoBio 2011 Synopsis + + rbold: An R Interface for Bold Systems barcode repository

            - + -

            We just wrapped up the 2011 iEvoBio meeting. It was awesome! If you didn't go this year or last year, definitely think about going next year.



            Here is a list of the cool projects that were discussed at the meeting (apologies if I left some out):
            1. Vistrails: workflow tool, awesome project by Claudio Silva
            2. Commplish: purpose is to use via API's, not with the web UI
            3. Phylopic: a database of life-form silouhettes, including an API for remote access, sweet!
            4. Gloome
            5. MappingLife: awesome geographic/etc data visualization interace on the web
            6. SuiteSMA: visualizating multiple alignments
            7. treeBASE: R interface to treebase, by Carl Boettiger
            8. VertNet: database for vertebrate natural history collections
            9. RevBayes: revamp of MrBayes, with GUI, etc. 
            10. Phenoscape Knowledge Base
              • Peter Midford lightning talk: talked about matching taxonomic and genetic data
            11. BiSciCol: biological science collections tracker
            12. Ontogrator 
            13. TNRS: taxonomic name resolution service
            14. Barcode of Life data systems, and remote access
            15. Moorea Biocode Project
            16. Microbial LTER's data
            17. BirdVis: interactive bird data visualization (Claudio Silva in collaboration with Cornell Lab of Ornithology)
            18. Crowdlabs: I think the site is down right now, another project by Claudio Silva
            19. Phycas: Bayesian phylogenetics, can you just call this from R?
            20. RIP MrBayes!!!! replaced by RevBayes (see 9 above)
            21. Slides of presentations will be at Slideshare (not all presentations up yet)          
            22. A birds of a feather group I was involved in proposed an idea (TOL-o-matic) like Phylomatic, but of broader scope, for easy access and submission of trees, and perhaps even social (think just pushing a 'SHARE' button within PAUP, RevBayes, or other phylogenetics software)! 
            23. Synopses of Birds of a Feather discussion groups: http://piratepad.net/iEvoBio11-BoF-reportouts

            + Have you ever wanted to search and fetch barcode data from Bold Systems?

            I am developing functions to interface with Bold from R. I just started, but hopefully folks will find it useful.

            The code is at Github here. The two functions are still very buggy, so please bring up issues below, or in the Issues area on Github. For example, some searches work and other similar searches don't. Apologies in advance for the bugs.

            Below is a screenshot of an example query using function getsampleids to get barcode identifiers for specimens. You can then use getseqs function to grab barcode data for specific specimens or many specimens.
            Screen shot 2011-06-28 at 9.24.00 AM.png

            - - PLoS journals API from R: "rplos" + + iEvoBio 2011 Synopsis

            - + -

            The Public Libraries of Science (PLOS) has an API so that developers can create cool tools to access their data (including full text papers!!).

            Carl Boettiger at UC Davis and I are working on R functions that use the PLoS API. See our code on Github here. See the wiki at the Github page for examples of use. We hope to deploy rplos as a package someday soon. Please feel free to suggest changes/additions rplos in the comments below or on the Github/rplos site.

            Get your own API key here.

            + We just wrapped up the 2011 iEvoBio meeting. It was awesome! If you didn't go this year or last year, definitely think about going next year.


            Here is a list of the cool projects that were discussed at the meeting (apologies if I left some out):
            1. Vistrails: workflow tool, awesome project by Claudio Silva
            2. Commplish: purpose is to use via API's, not with the web UI
            3. Phylopic: a database of life-form silouhettes, including an API for remote access, sweet!
            4. Gloome
            5. MappingLife: awesome geographic/etc data visualization interace on the web
            6. SuiteSMA: visualizating multiple alignments
            7. treeBASE: R interface to treebase, by Carl Boettiger
            8. VertNet: database for vertebrate natural history collections
            9. RevBayes: revamp of MrBayes, with GUI, etc. 
            10. Phenoscape Knowledge Base
              • Peter Midford lightning talk: talked about matching taxonomic and genetic data
            11. BiSciCol: biological science collections tracker
            12. Ontogrator 
            13. TNRS: taxonomic name resolution service
            14. Barcode of Life data systems, and remote access
            15. Moorea Biocode Project
            16. Microbial LTER's data
            17. BirdVis: interactive bird data visualization (Claudio Silva in collaboration with Cornell Lab of Ornithology)
            18. Crowdlabs: I think the site is down right now, another project by Claudio Silva
            19. Phycas: Bayesian phylogenetics, can you just call this from R?
            20. RIP MrBayes!!!! replaced by RevBayes (see 9 above)
            21. Slides of presentations will be at Slideshare (not all presentations up yet)          
            22. A birds of a feather group I was involved in proposed an idea (TOL-o-matic) like Phylomatic, but of broader scope, for easy access and submission of trees, and perhaps even social (think just pushing a 'SHARE' button within PAUP, RevBayes, or other phylogenetics software)! 
            23. Synopses of Birds of a Feather discussion groups: http://piratepad.net/iEvoBio11-BoF-reportouts
            diff --git a/_site/page47/index.html b/_site/page47/index.html index f10019261d..964fd3d852 100644 --- a/_site/page47/index.html +++ b/_site/page47/index.html @@ -61,40 +61,40 @@

            Recology

            - - ggplot2 talk by Hadley Whickam at Google + + PLoS journals API from R: "rplos"

            - + - + The Public Libraries of Science (PLOS) has an API so that developers can create cool tools to access their data (including full text papers!!).

            Carl Boettiger at UC Davis and I are working on R functions that use the PLoS API. See our code on Github here. See the wiki at the Github page for examples of use. We hope to deploy rplos as a package someday soon. Please feel free to suggest changes/additions rplos in the comments below or on the Github/rplos site.

            Get your own API key here.

            - - OpenStates from R via API: watch your elected representatives + + ggplot2 talk by Hadley Whickam at Google

            - + -

            I am writing some functions to acquire data from the OpenStates project, via their API. They have a great support community at Google Groups as well.

            On its face this post is not obviously about ecology or evolution, but well, our elected representatives do, so to speak, hold our environment in a noose, ready to let the Earth hang any day.

            Code I am developing is over at Github.

            Here is an example of its use in R, in this case using the Bill Search option (billsearch.R on my Github site), and in this case you do not provide your API key in the function call, but instead put it in your .Rprofile file, which is called when you open R. We are searching here for the term 'agriculture' in Texas ('tx'), in the 'upper' chamber.

            > temp <- billsearch('agriculture', state = 'tx', chamber = 'upper')
             
            > length(temp)
            [1] 21
             
            > temp[[1]]
            $title
            [1] "Congratulating John C. Padalino of El Paso for being appointed to the United States Department of Agriculture."
             
            $createdat
            [1] "2010-08-11 07:59:46"
             
            $updated
            at
            [1] "2010-09-02 03:34:39"
             
            $chamber
            [1] "upper"
             
            $state
            [1] "tx"
             
            $session
            [1] "81"
             
            $type
            $type[[1]]
            [1] "resolution"
             
             
            $subjects
            $subjects[[1]]
            [1] "Resolutions"
             
            $subjects[[2]]
            [1] "Other"
             
             
            $billid
            [1] "SR 1042"
            Created by Pretty R at inside-R.org


            Apparently, the first bill (SR 2042, see $bill
            id at the bottom of the list output) that came up was to congratulate John Paladino for being appointed to the USDA.

            The other function I have ready is getting basic metadata on a state, called statemetasearch.

            I plan to develop more functions for all the possible API calls to the OpenStates project.

            +

            - - Just turned on 'mobile template' for this blog, let me know if it works + + OpenStates from R via API: watch your elected representatives

            - + -
            + I am writing some functions to acquire data from the OpenStates project, via their API. They have a great support community at Google Groups as well.

            On its face this post is not obviously about ecology or evolution, but well, our elected representatives do, so to speak, hold our environment in a noose, ready to let the Earth hang any day.

            Code I am developing is over at Github.

            Here is an example of its use in R, in this case using the Bill Search option (billsearch.R on my Github site), and in this case you do not provide your API key in the function call, but instead put it in your .Rprofile file, which is called when you open R. We are searching here for the term 'agriculture' in Texas ('tx'), in the 'upper' chamber.

            > temp <- billsearch('agriculture', state = 'tx', chamber = 'upper')
             
            > length(temp)
            [1] 21
             
            > temp[[1]]
            $title
            [1] "Congratulating John C. Padalino of El Paso for being appointed to the United States Department of Agriculture."
             
            $created_at
            [1] "2010-08-11 07:59:46"
             
            $updated_at
            [1] "2010-09-02 03:34:39"
             
            $chamber
            [1] "upper"
             
            $state
            [1] "tx"
             
            $session
            [1] "81"
             
            $type
            $type[[1]]
            [1] "resolution"
             
             
            $subjects
            $subjects[[1]]
            [1] "Resolutions"
             
            $subjects[[2]]
            [1] "Other"
             
             
            $bill_id
            [1] "SR 1042"
            Created by Pretty R at inside-R.org


            Apparently, the first bill (SR 2042, see $bill_id at the bottom of the list output) that came up was to congratulate John Paladino for being appointed to the USDA.

            The other function I have ready is getting basic metadata on a state, called statemetasearch.

            I plan to develop more functions for all the possible API calls to the OpenStates project.
            diff --git a/_site/page48/index.html b/_site/page48/index.html index b9ddfb04a6..a02e2493d3 100644 --- a/_site/page48/index.html +++ b/_site/page48/index.html @@ -61,40 +61,40 @@

            Recology

            - - > 10,000 visits to Recology!!!! + + Just turned on 'mobile template' for this blog, let me know if it works

            -

            Thanks so much everyone for stopping by!

            +

            - - How to fit power laws + + > 10,000 visits to Recology!!!!

            - + -

            A new paper out in Ecology by Xiao and colleagues (in press, here) compares the use of log-transformation to non-linear regression for analyzing power-laws.

            They suggest that the error distribution should determine which method performs better. When your errors are additive, homoscedastic, and normally distributed, they propose using non-linear regression. When errors are multiplicative, heteroscedastic, and lognormally distributed, they suggest using linear regression on log-transformed data. The assumptions about these two methods are different, so cannot be correct for a single dataset.

            They will provide their R code for their methods once they are up on Ecological Archives (they weren't up there by the time of this post).

            + Thanks so much everyone for stopping by!

            - - searching ITIS and fetching Phylomatic trees + + How to fit power laws

            - + -

            I am writing a set of functions to search ITIS for taxonomic information (more databases to come) and functions to fetch plant phylogenetic trees from Phylomatic. Code at github.

            Also, see the examples in the demos folder on the Github site above.









            + A new paper out in Ecology by Xiao and colleagues (in press, here) compares the use of log-transformation to non-linear regression for analyzing power-laws.

            They suggest that the error distribution should determine which method performs better. When your errors are additive, homoscedastic, and normally distributed, they propose using non-linear regression. When errors are multiplicative, heteroscedastic, and lognormally distributed, they suggest using linear regression on log-transformed data. The assumptions about these two methods are different, so cannot be correct for a single dataset.

            They will provide their R code for their methods once they are up on Ecological Archives (they weren't up there by the time of this post).
            diff --git a/_site/page49/index.html b/_site/page49/index.html index ca5dfbb0f4..1a48d072e1 100644 --- a/_site/page49/index.html +++ b/_site/page49/index.html @@ -59,6 +59,19 @@

            Recology

              +
            +

            + + searching ITIS and fetching Phylomatic trees + +

            + + + + I am writing a set of functions to search ITIS for taxonomic information (more databases to come) and functions to fetch plant phylogenetic trees from Phylomatic. Code at github.

            Also, see the examples in the demos folder on the Github site above.









            + +
            +

            @@ -68,27 +81,26 @@

            -

            I did a little simulation to examine how K and lambda vary in response to tree size (and how they compare to each other on the same simulated trees). I use Liam Revell's functions fastBM to generate traits, and phylosig to measure phylogenetic signal.

            + I did a little simulation to examine how K and lambda vary in response to tree size (and how they compare to each other on the same simulated trees). I use Liam Revell's functions fastBM to generate traits, and phylosig to measure phylogenetic signal. + +

            -



            +Two observations: -

            Two observations:

            ++ First, it seems that lambda is more sensitive than K to tree size, but then lambda levels out at about 40 species, whereas K continues to vary around a mean of 1. ++ Second, K is more variable than lambda at all levels of tree size (compare standard error bars). -
              -
            • First, it seems that lambda is more sensitive than K to tree size, but then lambda levels out at about 40 species, whereas K continues to vary around a mean of 1.
            • -
            • Second, K is more variable than lambda at all levels of tree size (compare standard error bars).
            • -
            +

            -



            +Does this make sense to those smart folks out there? +

            -

            Does this make sense to those smart folks out there? -

            +
            -


            +

            -



            -

            +

            @@ -101,148 +113,7 @@

            -

            UPDATE: Greg jordan has a much more elegant way of plotting trees with ggplot2. See his links in the comments below.


            I wrote a simple function for plotting a phylogeny in ggplot2. However, it only handles a 3 species tree right now, as I haven't figured out how to generalize the approach to N species.

            Any ideas on how to improve this?



            - -

            - -
            -

            - - plyr's idata.frame VS. data.frame - -

            - - - -
            - -

            I had seen the function idata.frame in plyr before, but not really tested it. From the plyr documentation:

            - -
            -

            "An immutable data frame works like an ordinary data frame, except that when you subset it, it returns a reference to the original data frame, not a a copy. This makes subsetting substantially faster and has a big impact when you are working with large datasets with many groups."

            -
            - -

            For example, although baseball is a data.frame, its immutable counterpart is a reference to it:

            - -
            > idata.frame(baseball)
            -<environment: 0x1022c74e8>
            -attr(,"class")
            -[1] "idf"         "environment"
            - -

            Here are a few comparisons of operations on normal data frames and immutable data frames. Immutable data frames don't work with the doBy package, but do work with aggregate in base functions.  Overall, the speed gains using idata.frame are quite impressive - I will use it more often for sure.

            - -

            Here's the comparisons of idata.frames and data.frames:

            - -
            - -
            # load packages
            -require(plyr)
            -require(reshape2)
            -
            -# Make immutable data frame
            -baseball_i <- idata.frame(baseball)
            - -

            Example 1 - idata.frame more than twice as fast

            - -
            system.time(replicate(50, ddply(baseball, "year", summarise, mean(rbi))))
            - -
               user  system elapsed 
            -  8.509   0.266   8.798 
            - -
            system.time(replicate(50, ddply(baseball_i, "year", summarise, mean(rbi))))
            - -
               user  system elapsed 
            -  7.233   0.025   7.334 
            - -

            Example 2 - Bummer, this does not work with idata.frame's

            - -
            colwise(max, is.numeric)(baseball)  # works
            - -
              year stint   g  ab   r   h X2b X3b hr rbi sb cs  bb so ibb hbp sh sf
            -1 2007     4 165 705 177 257  64  28 73  NA NA NA 232 NA  NA  NA NA NA
            -  gidp
            -1   NA
            - -
            colwise(max, is.numeric)(baseball_i)  # doesn't work
            - -
            Error: is.data.frame(df) is not TRUE
            - -

            Example 3 - idata.frame twice as fast

            - -
            system.time(replicate(100, baseball[baseball$year == "1884", ]))
            - -
               user  system elapsed 
            -  1.329   0.035   1.378 
            - -
            system.time(replicate(100, baseball_i[baseball_i$year == "1884", ]))
            - -
               user  system elapsed 
            -  0.674   0.015   0.689 
            - -

            Example 4 - idata.frame faster

            - -
            system.time(replicate(50, melt(baseball[, 1:4], id = 1)))
            - -
               user  system elapsed 
            -  7.129   0.506   7.691 
            - -
            system.time(replicate(50, melt(baseball_i[, 1:4], id = 1)))
            - -
               user  system elapsed 
            -  0.852   0.162   1.015 
            - -

            And you can go back to a data frame by

            - -
            d <- as.data.frame(baseball_i)
            -str(d)
            - -
            'data.frame': 21699 obs. of  22 variables:
            - $ id   : chr  "ansonca01" "forceda01" "mathebo01" "startjo01" ...
            - $ year : int  1871 1871 1871 1871 1871 1871 1871 1872 1872 1872 ...
            - $ stint: int  1 1 1 1 1 1 1 1 1 1 ...
            - $ team : chr  "RC1" "WS3" "FW1" "NY2" ...
            - $ lg   : chr  "" "" "" "" ...
            - $ g    : int  25 32 19 33 29 29 29 46 37 25 ...
            - $ ab   : int  120 162 89 161 128 146 145 217 174 130 ...
            - $ r    : int  29 45 15 35 35 40 36 60 26 40 ...
            - $ h    : int  39 45 24 58 45 47 37 90 46 53 ...
            - $ X2b  : int  11 9 3 5 3 6 5 10 3 11 ...
            - $ X3b  : int  3 4 1 1 7 5 7 7 0 0 ...
            - $ hr   : int  0 0 0 1 3 1 2 0 0 0 ...
            - $ rbi  : int  16 29 10 34 23 21 23 50 15 16 ...
            - $ sb   : int  6 8 2 4 3 2 2 6 0 2 ...
            - $ cs   : int  2 0 1 2 1 2 2 6 1 2 ...
            - $ bb   : int  2 4 2 3 1 4 9 16 1 1 ...
            - $ so   : int  1 0 0 0 0 1 1 3 1 0 ...
            - $ ibb  : int  NA NA NA NA NA NA NA NA NA NA ...
            - $ hbp  : int  NA NA NA NA NA NA NA NA NA NA ...
            - $ sh   : int  NA NA NA NA NA NA NA NA NA NA ...
            - $ sf   : int  NA NA NA NA NA NA NA NA NA NA ...
            - $ gidp : int  NA NA NA NA NA NA NA NA NA NA ...
            - -

            idata.frame doesn't work with the doBy package

            - -
            require(doBy)
            -summaryBy(rbi ~ year, baseball_i, FUN = c(mean), na.rm = T)
            - -
            Error: cannot coerce type 'environment' to vector of type 'any'
            - -

            But idata.frame works with aggregate in base (but with minimal speed gains) and aggregate is faster than ddply

            - -
            system.time(replicate(100, aggregate(rbi ~ year, baseball, mean)))
            - -
               user  system elapsed 
            -  4.998   0.346   5.373 
            - -
            system.time(replicate(100, aggregate(rbi ~ year, baseball_i, mean)))
            - -
               user  system elapsed 
            -  4.745   0.283   5.045 
            - -
            system.time(replicate(100, ddply(baseball_i, "year", summarise, mean(rbi))))
            - -
               user  system elapsed 
            - 13.293   0.042  13.428 
            + UPDATE: Greg jordan has a much more elegant way of plotting trees with ggplot2. See his links in the comments below.


            I wrote a simple function for plotting a phylogeny in ggplot2. However, it only handles a 3 species tree right now, as I haven't figured out how to generalize the approach to N species.

            Any ideas on how to improve this?



            diff --git a/_site/page5/index.html b/_site/page5/index.html index c2f7fb0018..a75a6ccebe 100644 --- a/_site/page5/index.html +++ b/_site/page5/index.html @@ -61,596 +61,1060 @@

            Recology

            - - geojsonio - a new package to do geojson things + + lawn - a new package to do geospatial analysis

            - + + + `lawn` is an R wrapper for the Javascript library [turf.js](http://turfjs.org/) for advanced geospatial analysis. In addition, we have a few functions to interface with the [geojson-random](https://github.com/mapbox/geojson-random) Javascript library. + +`lawn` includes traditional spatial operations, helper functions for creating GeoJSON data, and data classification and statistics tools. + +There is an additional helper function (see `view()`) in this package to help visualize data with interactive maps via the `leaflet` package ([https://github.com/rstudio/leaflet](https://github.com/rstudio/leaflet)). Note that `leaflet` is not required to install `lawn` - it's in Suggests, not Imports or Depends. + +Use cases for this package include (but not limited to, obs.) the following (all below assumes GeoJSON format): + +* Create random spatial data. +* Convert among spatial data types (e.g. `Polygon` to `FeatureCollection`) +* Transform objects, including merging many, simplifying, calculating hulls, etc. +* Measuring objects +* Performing interpolation of objects +* Aggregating data (aka properties) associated with objects + +## Install + +Stable `lawn` version from CRAN - this should fetch `leaflet`, which is not on CRAN, but in a `drat` repo (let me know if it doesn't) + + +```r +install.packages("lawn") +``` + +Or, the development version from Github + + +```r +devtools::install_github("ropensci/lawn") +``` + + +```r +library("lawn") +``` + +## view + +`lawn` includes a tiny helper function for visualizing geojson. For examples below, we'll make liberal use of the `lawn::view()` function to visualize what it is the heck we're doing. mkay, lets roll... + +We've tried to make `view()` work with as many inputs as possible, from class `character` containing +json to the class `json` from the `jsonlite` package, to the class `list` to all of the GeoJSON outputs +from functions in `lawn`. + + +```r +view(lawn_data$points_average) +``` + +![map1](/public/img/2015-05-18-mow-the-lawn/map1.png) + +Here, we sample at random two points from the same dataset just viewed. + + +```r +lawn_sample(lawn_data$points_average, 2) %>% view() +``` + +![map2](/public/img/2015-05-18-mow-the-lawn/map2.png) + +## Make some geojson data + +Point + + +```r +lawn_point(c(-74.5, 40)) +#> $type +#> [1] "Feature" +#> +#> $geometry +#> $geometry$type +#> [1] "Point" +#> +#> $geometry$coordinates +#> [1] -74.5 40.0 +#> +#> +#> $properties +#> named list() +#> +#> attr(,"class") +#> [1] "point" +``` + -

            geojsonio converts geographic data to GeoJSON and TopoJSON formats - though the focus is mostly on GeoJSON

            - -

            For those not familiar GeoJSON it is a format for encoding a variety of geographic data structures. GeoJSON supports the following geometry types: Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection. These geometry types are also found in well known text (WKT), and have equivalents in R's spatial classes. Read the spec for more detailed information.

            - -

            Other great geojson resources:

            - - - -

            Functions in this package are organized first around what you're working with or want to get, geojson or topojson, then convert to or read from various formats:

            - -
              -
            • geojson_list() - convert to GeoJSON as R list format
            • -
            • geojson_json() - convert to GeoJSON as json
            • -
            • geojson_read()/topojson_read() - read a GeoJSON/TopoJSON file from file path or URL
            • -
            • geojson_write() - write a GeoJSON file locally (no write TopoJSON yet)
            • -
            - -

            Each of the above functions have methods for various objects/classes, including numeric, data.frame, list, SpatialPolygons, SpatialLines, SpatialPoints, etc. (including the classes in rgeos)

            - -

            Use cases for this package include (but not limited to, obs.) the following:

            - -
              -
            • Get data in GeoJSON json format, and you want to get it into a list in R.
            • -
            • Get data into GeoJSON format to use downstream to make a interactive map - -
                -
              • in R (e.g., with leaflet)
              • -
              • or in another context (e.g., using javascript with mapbox/leaflet)
              • -
            • -
            • Data is in a data.frame/matrix/list and you want to make GeoJSON format data.
            • -
            • Data is in one of the many spatial classes (e.g., SpatialPoints) and you want GeoJSON
            • -
            • You need to add styling to your data - can do with this package for certain data types.
            • -
            • You want to check that your GeoJSON data is valid - two ways to do it in geojsonio.
            • -
            • Combine objects together (e.g., a point and a line), either from two geo_list objects, or two json objects. See ?geojson-add
            • -
            - -

            Install

            - -

            See the github repo for notes about dependencies https://github.com/ropensci/geojsonio#install.

            - -

            CRAN version or the dev version from GitHub

            -
            install.packages("geojsonio")
            -devtools::install_github("sckott/geojsonio")
            -
            library("geojsonio")
            -
            -

            GeoJSON

            - -

            Convert various formats to geojson

            - -

            From a numeric vector of length 2

            - -

            as json

            -
            geojson_json(c(32.45, -99.74))
            -#> {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[32.45,-99.74]},"properties":{}}]}
            -
            -

            as a list

            -
            geojson_list(c(32.45, -99.74))
            -#> $type
            -#> [1] "FeatureCollection"
            -#> 
            -#> $features
            -#> $features[[1]]
            -#> $features[[1]]$type
            -#> [1] "Feature"
            -#> 
            -#> $features[[1]]$geometry
            -#> $features[[1]]$geometry$type
            -...
            -
            -

            From a data.frame

            - -

            as json

            -
            geojson_json(us_cities[1:2, ], lat = 'lat', lon = 'long')
            -#> {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-99.74,32.45]},"properties":{"name":"Abilene TX","country.etc":"TX","pop":"113888","capital":"0"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[-81.52,41.08]},"properties":{"name":"Akron OH","country.etc":"OH","pop":"206634","capital":"0"}}]}
            -
            -

            as a list

            -
            geojson_list(us_cities[1:2, ], lat = 'lat', lon = 'long')
            -#> $type
            -#> [1] "FeatureCollection"
            -#> 
            -#> $features
            -#> $features[[1]]
            -#> $features[[1]]$type
            -#> [1] "Feature"
            -#> 
            -#> $features[[1]]$geometry
            -#> $features[[1]]$geometry$type
            -...
            -
            -

            From SpatialPolygons class

            -
            library('sp')
            -poly1 <- Polygons(list(Polygon(cbind(c(-100, -90, -85, -100),
            -  c(40, 50, 45, 40)))), "1")
            -poly2 <- Polygons(list(Polygon(cbind(c(-90, -80, -75, -90),
            -  c(30, 40, 35, 30)))), "2")
            -(sp_poly <- SpatialPolygons(list(poly1, poly2), 1:2))
            -#> An object of class "SpatialPolygons"
            -#> Slot "polygons":
            -#> [[1]]
            -#> An object of class "Polygons"
            -#> Slot "Polygons":
            -#> [[1]]
            -#> An object of class "Polygon"
            -#> Slot "labpt":
            -#> [1] -91.66667  45.00000
            -#> 
            -...
            -
            -

            to json

            -
            geojson_json(sp_poly)
            -#> {"type":"FeatureCollection","features":[{"type":"Feature","id":1,"properties":{"dummy":0},"geometry":{"type":"Polygon","coordinates":[[[-100,40],[-90,50],[-85,45],[-100,40]]]}},{"type":"Feature","id":2,"properties":{"dummy":0},"geometry":{"type":"Polygon","coordinates":[[[-90,30],[-80,40],[-75,35],[-90,30]]]}}]}
            -
            -

            to a list

            -
            geojson_list(sp_poly)
            -#> $type
            -#> [1] "FeatureCollection"
            -#> 
            -#> $features
            -#> $features[[1]]
            -#> $features[[1]]$type
            -#> [1] "Feature"
            -#> 
            -#> $features[[1]]$id
            -#> [1] 1
            -...
            -
            -

            From SpatialPoints class

            -
            x <- c(1, 2, 3, 4, 5)
            -y <- c(3, 2, 5, 1, 4)
            -(s <- SpatialPoints(cbind(x, y)))
            -#> SpatialPoints:
            -#>      x y
            -#> [1,] 1 3
            -#> [2,] 2 2
            -#> [3,] 3 5
            -#> [4,] 4 1
            -#> [5,] 5 4
            -#> Coordinate Reference System (CRS) arguments: NA
            -
            -

            to json

            -
            geojson_json(s)
            -#> {"type":"FeatureCollection","features":[{"type":"Feature","id":1,"properties":{"dat":1},"geometry":{"type":"Point","coordinates":[1,3]}},{"type":"Feature","id":2,"properties":{"dat":2},"geometry":{"type":"Point","coordinates":[2,2]}},{"type":"Feature","id":3,"properties":{"dat":3},"geometry":{"type":"Point","coordinates":[3,5]}},{"type":"Feature","id":4,"properties":{"dat":4},"geometry":{"type":"Point","coordinates":[4,1]}},{"type":"Feature","id":5,"properties":{"dat":5},"geometry":{"type":"Point","coordinates":[5,4]}}]}
            -
            -

            to a list

            -
            geojson_list(s)
            -#> $type
            -#> [1] "FeatureCollection"
            -#> 
            -#> $features
            -#> $features[[1]]
            -#> $features[[1]]$type
            -#> [1] "Feature"
            -#> 
            -#> $features[[1]]$id
            -#> [1] 1
            -...
            -
            -

            Combine objects

            - -

            geo_list + geo_list

            - -
            -

            Note: geo_list is the output type from geojson_list(), it's just a list with a class attached so we know it's geojson :)

            -
            -
            vec <- c(-99.74, 32.45)
            -a <- geojson_list(vec)
            -vecs <- list(c(100.0, 0.0), c(101.0, 0.0), c(100.0, 0.0))
            -b <- geojson_list(vecs, geometry = "polygon")
            -a + b
            -#> $type
            -#> [1] "FeatureCollection"
            -#> 
            -#> $features
            -#> $features[[1]]
            -#> $features[[1]]$type
            -#> [1] "Feature"
            -#> 
            -#> $features[[1]]$geometry
            -#> $features[[1]]$geometry$type
            -...
            -
            -

            json + json

            -
            c <- geojson_json(c(-99.74, 32.45))
            -vecs <- list(c(100.0, 0.0), c(101.0, 0.0), c(101.0, 1.0), c(100.0, 1.0), c(100.0, 0.0))
            -d <- geojson_json(vecs, geometry = "polygon")
            -c + d
            -#> {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-99.74,32.45]},"properties":{}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[100,0],[101,0],[101,1],[100,1],[100,0]]]},"properties":[]}]}
            -
            -

            Write geojson

            -
            geojson_write(us_cities[1:2, ], lat = 'lat', lon = 'long')
            -#> <geojson>
            -#>   Path:       myfile.geojson
            -#>   From class: data.frame
            -
            -

            Topojson

            - -

            In the current version of this package you can read topojson. Writing topojson was in this package, but is gone for now - will come back later as in interface to topojson via V8.

            - -

            Read from a file

            -
            file <- system.file("examples", "us_states.topojson", package = "geojsonio")
            -out <- topojson_read(file)
            -
            -

            Read from a URL

            -
            url <- "https://raw.githubusercontent.com/shawnbot/d3-cartogram/master/data/us-states.topojson"
            -out <- topojson_read(url)
            -
            -

            Lint geojson

            - -

            There are two ways to do this in this package.

            - -

            lint, locally

            - -

            Uses the javascript library geojsonhint from Mapbox. We're running this locally via the V8 package.

            - -

            Good

            -
            lint('{"type": "Point", "coordinates": [-100, 80]}')
            -#> [1] "valid"
            -
            -

            Bad

            -
            lint('{"type": "Rhombus", "coordinates": [[1, 2], [3, 4], [5, 6]]}')
            -#> $message
            -#> [1] "The type Rhombus is unknown"
            -#> 
            -#> $line
            -#> [1] 1
            -
            -

            validate, with a web service

            - -

            Uses the web service at http://geojsonlint.com/

            - -

            Good

            -
            validate('{"type": "Point", "coordinates": [-100, 80]}')
            -#> $status
            -#> [1] "ok"
            -
            -

            Bad

            -
            validate('{"type": "Rhombus", "coordinates": [[1, 2], [3, 4], [5, 6]]}')
            -#> $message
            -#> [1] "\"Rhombus\" is not a valid GeoJSON type."
            -#> 
            -#> $status
            -#> [1] "error"
            -
            -

            To do

            - -
              -
            • I'd like to replace rgdal with javascript libraries to read from various file types (kml, shp, etc.) and convert to geojson. This is in development, and will come in the next version of this package most likely. This should make installation a bit easier as we won't have to depend on rgdal and GDAL
            • -
            • Performance improvements. Some operations already use the gdal or geos C libraries, so are quite fast, though the round trip to disk and back does take significant time. I'd like to speed this up.
            • -
            • More input types. We already have operations (json, list, etc.) for lots of input types (data.frame, list, sp classes), but likely there will be more added.
            • -
            • Most likely add functions topojson_list(), topojson_json()
            • -
            +```r +lawn_point(c(-74.5, 40)) %>% view +``` + +![point](/public/img/2015-05-18-mow-the-lawn/point.png) + +Polygon + + +```r +rings <- list(list( + c(-2.275543, 53.464547), + c(-2.275543, 53.489271), + c(-2.215118, 53.489271), + c(-2.215118, 53.464547), + c(-2.275543, 53.464547) +)) +lawn_polygon(rings) +#> $type +#> [1] "Feature" +#> +#> $geometry +#> $geometry$type +#> [1] "Polygon" +#> +#> $geometry$coordinates +#> , , 1 +#> +#> [,1] [,2] [,3] [,4] [,5] +#> [1,] -2.275543 -2.275543 -2.215118 -2.215118 -2.275543 +#> +#> , , 2 +#> +#> [,1] [,2] [,3] [,4] [,5] +#> [1,] 53.46455 53.48927 53.48927 53.46455 53.46455 +#> +#> +#> +#> $properties +#> named list() +#> +#> attr(,"class") +#> [1] "polygon" +``` + + +```r +lawn_polygon(rings) %>% view +``` + +![polygon](/public/img/2015-05-18-mow-the-lawn/polygon.png) + +Random set of points + + +```r +lawn_random(n = 2) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> type geometry.type geometry.coordinates +#> 1 Feature Point -137.46327, -63.46154 +#> 2 Feature Point -110.68426, 83.10533 +#> +#> attr(,"class") +#> [1] "featurecollection" +``` + + +```r +lawn_random(n = 5) %>% view +``` + +![rand1](/public/img/2015-05-18-mow-the-lawn/lawn_random.png) + +Or, use a different Javascript library ([geojson-random](https://github.com/mapbox/geojson-random)) to create random features. + +Positions + + +```r +gr_position() +#> [1] -179.77996 45.99018 +``` + +Points + + +```r +gr_point(2) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> type geometry.type geometry.coordinates +#> 1 Feature Point 5.83895, -27.77218 +#> 2 Feature Point 78.50177, 14.95840 +#> +#> attr(,"class") +#> [1] "featurecollection" +``` + + +```r +gr_point(2) %>% view +``` + +![rand2](/public/img/2015-05-18-mow-the-lawn/geojsonrandom_points.png) + +Polygons + + +```r +gr_polygon(n = 1, vertices = 5, max_radial_length = 5) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> type geometry.type +#> 1 Feature Polygon +#> geometry.coordinates +#> 1 67.58827, 67.68551, 67.00091, 66.70156, 65.72578, 67.58827, -42.11340, -42.69850, -43.54866, -42.42758, -41.76731, -42.11340 +#> +#> attr(,"class") +#> [1] "featurecollection" +``` + + +```r +gr_polygon(n = 1, vertices = 5, max_radial_length = 5) %>% view +``` + +![rand3](/public/img/2015-05-18-mow-the-lawn/geojsonrandom_polygons.png) + +## count + +Count number of points within polygons, appends a new field to properties (see the `count` field) + + +```r +lawn_count(polygons = lawn_data$polygons_count, points = lawn_data$points_count) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> type pt_count geometry.type +#> 1 Feature 2 Polygon +#> 2 Feature 0 Polygon +#> geometry.coordinates +#> 1 -112.07239, -112.07239, -112.02810, -112.02810, -112.07239, 46.58659, 46.61761, 46.61761, 46.58659, 46.58659 +#> 2 -112.02398, -112.02398, -111.96613, -111.96613, -112.02398, 46.57043, 46.61502, 46.61502, 46.57043, 46.57043 +#> +#> attr(,"class") +#> [1] "featurecollection" +``` + +## distance + +Define two points + + +```r +from <- '{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Point", + "coordinates": [-75.343, 39.984] + } +}' +to <- '{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Point", + "coordinates": [-75.534, 39.123] + } +}' +``` + +Calculate distance, default units is kilometers (default output: `km`) + + +```r +lawn_distance(from, to) +#> [1] 97.15958 +``` + +## sample from a FeatureCollection + + +```r +dat <- lawn_data$points_average +cat(dat) +#> { +#> "type": "FeatureCollection", +#> "features": [ +#> { +#> "type": "Feature", +#> "properties": { +#> "population": 200 +#> }, +#> "geometry": { +#> "type": "Point", +... +``` + +Sample 2 points at random + + +```r +lawn_sample(dat, 2) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> type population geometry.type geometry.coordinates +#> 1 Feature 200 Point 10.80643, 59.90891 +#> 2 Feature 600 Point 10.71579, 59.90478 +#> +#> attr(,"class") +#> [1] "featurecollection" +``` + +## extent + +Calculates the extent of all input features in a FeatureCollection, and returns a bounding box. + + +```r +lawn_extent(lawn_data$points_average) +#> [1] 10.71579 59.90478 10.80643 59.93162 +``` + +## buffer + +Calculates a buffer for input features for a given radius. + + +```r +dat <- '{ + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-112.072391,46.586591], + [-112.072391,46.61761], + [-112.028102,46.61761], + [-112.028102,46.586591], + [-112.072391,46.586591] + ]] + } +}' +view(dat) +``` + +![buffer1](/public/img/2015-05-18-mow-the-lawn/buffer1.png) + + +```r +lawn_buffer(dat, 1, "miles") %>% view +``` + +![buffer2](/public/img/2015-05-18-mow-the-lawn/buffer2.png) + +## Union polygons together + + +```r +poly1 <- '{ + "type": "Feature", + "properties": { + "fill": "#0f0" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-122.801742, 45.48565], + [-122.801742, 45.60491], + [-122.584762, 45.60491], + [-122.584762, 45.48565], + [-122.801742, 45.48565] + ]] + } +}' + +poly2 <- '{ + "type": "Feature", + "properties": { + "fill": "#00f" + }, + "geometry": { + "type": "Polygon", + "coordinates": [[ + [-122.520217, 45.535693], + [-122.64038, 45.553967], + [-122.720031, 45.526554], + [-122.669906, 45.507309], + [-122.723464, 45.446643], + [-122.532577, 45.408574], + [-122.487258, 45.477466], + [-122.520217, 45.535693] + ]] + } +}' +view(poly1) +``` + +![union1](/public/img/2015-05-18-mow-the-lawn/union1.png) + + +```r +view(poly2) +``` + +![union2](/public/img/2015-05-18-mow-the-lawn/union2.png) + +Visualize union-ed polygons + + +```r +lawn_union(poly1, poly2) %>% view +``` + +![union3](/public/img/2015-05-18-mow-the-lawn/union3.png) + +See also `lawn_merge()` and `lawn_intersect()`. + +## lint input geojson + +For most functions, you can lint your input geojson data to make sure it is proper geojson. We use +the javascript library [geojsonhint](https://github.com/mapbox/geojsonhint). See the `lint` parameter. + +Good GeoJSON + + +```r +dat <- '{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "population": 200 + }, + "geometry": { + "type": "Point", + "coordinates": [10.724029, 59.926807] + } + } + ] +}' +lawn_extent(dat) +#> [1] 10.72403 59.92681 10.72403 59.92681 +``` + +Bad GeoJSON + + +```r +dat <- '{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": { + "population": 200 + }, + "geometry": { + "type": "Point" + } + } + ] +}' +lawn_extent(dat, lint = TRUE) + +#> Error: Line 1 - "coordinates" property required +``` + +## To do + +* As Turf.js changes, we'll update `lawn` +* Performance improvements. We realize that this package is slower than the C based `rgdal`/`rgeos` - we are looking into ways to increaes performance to get closer to the performance of those packages.

            - - the new way - httsnap + + geojsonio - a new package to do geojson things

            - + + + `geojsonio` converts geographic data to GeoJSON and TopoJSON formats - though the focus is mostly on GeoJSON + +For those not familiar GeoJSON it is a format for encoding a variety of geographic data structures. GeoJSON supports the following geometry types: Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, and GeometryCollection. These geometry types are also found in [well known text (WKT)](http://en.wikipedia.org/wiki/Well-known_text), and have equivalents in R's spatial classes. Read the [spec](http://geojson.org/geojson-spec.html) for more detailed information. + +Other great geojson resources: + +* GeoJSON lint - lint your geojson - [http://geojsonlint.com/](http://geojsonlint.com/) +* GeoJSON.io - make maps with geojson input or draw maps and get geojson - [http://geojson.io/](http://geojson.io/) + +Functions in this package are organized first around what you're working with or want to get, geojson or topojson, then convert to or read from various formats: + +* `geojson_list()` - convert to GeoJSON as R list format +* `geojson_json()` - convert to GeoJSON as json +* `geojson_read()`/`topojson_read()` - read a GeoJSON/TopoJSON file from file path or URL +* `geojson_write()` - write a GeoJSON file locally (no write TopoJSON yet) + +Each of the above functions have methods for various objects/classes, including `numeric`, `data.frame`, `list`, `SpatialPolygons`, `SpatialLines`, `SpatialPoints`, etc. (including the classes in `rgeos`) + +Use cases for this package include (but not limited to, obs.) the following: + +* Get data in GeoJSON json format, and you want to get it into a list in R. +* Get data into GeoJSON format to use downstream to make a interactive map + * in R (e.g., with [leaflet](https://github.com/rstudio/leaflet)) + * or in another context (e.g., using javascript with mapbox/leaflet) +* Data is in a data.frame/matrix/list and you want to make GeoJSON format data. +* Data is in one of the many spatial classes (e.g., `SpatialPoints`) and you want GeoJSON +* You need to add styling to your data - can do with this package for certain data types. +* You want to check that your GeoJSON data is valid - two ways to do it in geojsonio. +* Combine objects together (e.g., a point and a line), either from two `geo_list` objects, or two `json` objects. See `?geojson-add` + +## Install + +See the github repo for notes about dependencies [https://github.com/ropensci/geojsonio#install](https://github.com/ropensci/geojsonio#install). + +CRAN version or the dev version from GitHub + + +```r +install.packages("geojsonio") +devtools::install_github("sckott/geojsonio") +``` + + +```r +library("geojsonio") +``` + +## GeoJSON + +### Convert various formats to geojson -

            Inspired by httpie, a Python command line client as a sort of drop in replacement for curl, I am playing around with something similar-ish in R, at least in spirit. I started a little R pkg called httsnap with the following ideas:

            - -
              -
            • The web is increasingly a JSON world, so set content-type and accept headers to applications/json by default
            • -
            • The workflow follows logically, or at least should, from, hey, I got this url, to i need to add some options, to execute request
            • -
            • Whenever possible, transform output to data.frame's - facilitating downstream manipulation via dplyr, etc.
            • -
            • Do GET requests by default. Specify a different type if you don't want GET. Some functionality does GET by default, though in some cases you need to specify GET
            • -
            • You can use non-standard evaluation to easily pass in query parameters without worrying about &'s, URL escaping, etc. (see Query())
            • -
            • Same for body params (see Body())
            • -
            - -

            Install

            - -

            Install and load httsnap

            -
            devtools::install_github("sckott/httsnap")
            -
            library("httsnap")
            -library("dplyr")
            -
            -

            Functions so far

            - -
              -
            • Get - GET request
            • -
            • Query - add query parameters
            • -
            • Authenticate - add authentication details
            • -
            • Progress - add progress bar
            • -
            • Timeout - add a timeout
            • -
            • User_agent - add a user agent
            • -
            • Verbose - give verbose output
            • -
            • Body - add a body
            • -
            • h - add headers by key-value pair
            • -
            - -

            These are named to avoid conflict with httr

            - -

            Intro

            - -

            A simple GET request

            -
            "http://httpbin.org/get" %>%
            -  Get()
            -#> $args
            -#> named list()
            -#> 
            -#> $headers
            -#> $headers$Accept
            -#> [1] "application/json, text/xml, application/xml, */*"
            -#> 
            -#> $headers$`Accept-Encoding`
            -#> [1] "gzip"
            -#> 
            -#> $headers$Host
            -#> [1] "httpbin.org"
            -#> 
            -#> $headers$`User-Agent`
            -#> [1] "curl/7.37.1 Rcurl/1.95.4.1 httr/0.6.1 httsnap/0.0.2.99"
            -#> 
            -#> 
            -#> $origin
            -#> [1] "24.21.209.71"
            -#> 
            -#> $url
            -#> [1] "http://httpbin.org/get"
            -
            -

            You'll notice that Get() doesn't just get the response, but also checks for whether it was a good response (the HTTP status code), and extracts the data.

            - -

            Or you can just pass the URL into the function itself

            -
            Get("http://httpbin.org/get")
            -#> $args
            -#> named list()
            -#> 
            -#> $headers
            -#> $headers$Accept
            -#> [1] "application/json, text/xml, application/xml, */*"
            -#> 
            -#> $headers$`Accept-Encoding`
            -#> [1] "gzip"
            -#> 
            -#> $headers$Host
            -#> [1] "httpbin.org"
            -#> 
            -#> $headers$`User-Agent`
            -#> [1] "curl/7.37.1 Rcurl/1.95.4.1 httr/0.6.1 httsnap/0.0.2.99"
            -#> 
            -#> 
            -#> $origin
            -#> [1] "24.21.209.71"
            -#> 
            -#> $url
            -#> [1] "http://httpbin.org/get"
            -
            -

            You can buid up options by calling functions via pipes, and see what the options look like

            -
            "http://httpbin.org/get" %>%
            -  Progress() %>%
            -  Verbose()
            -#> <http request> 
            -#>   url: http://httpbin.org/get
            -#>   config: 
            -#> Config: 
            -#> List of 4
            -#>  $ noprogress      :FALSE
            -#>  $ progressfunction:function (...)  
            -#>  $ debugfunction   :function (...)  
            -#>  $ verbose         :TRUE
            -
            -

            Then execute the GET request when you're ready

            -
            "http://httpbin.org/get" %>%
            -  Progress() %>%
            -  Verbose() %>%
            -  Get()
            -#> $args
            -#> named list()
            -#> 
            -#> $headers
            -#> $headers$Accept
            -#> [1] "application/json, text/xml, application/xml, */*"
            -#> 
            -#> $headers$`Accept-Encoding`
            -#> [1] "gzip"
            -#> 
            -#> $headers$Host
            -#> [1] "httpbin.org"
            -#> 
            -#> $headers$`User-Agent`
            -#> [1] "curl/7.37.1 Rcurl/1.95.4.1 httr/0.6.1 httsnap/0.0.2.99"
            -#> 
            -#> 
            -#> $origin
            -#> [1] "24.21.209.71"
            -#> 
            -#> $url
            -#> [1] "http://httpbin.org/get"
            -
            -

            Example 1

            - -

            Get scholarly article metadata from the Crossref API

            -
            "http://api.crossref.org/works" %>%
            -  Query(query = "ecology") %>% 
            -  .$message %>% 
            -  .$items %>% 
            -  select(DOI, title, publisher)
            -#>                            DOI                title
            -#> 1          10.4996/fireecology         Fire Ecology
            -#> 2              10.5402/ecology         ISRN Ecology
            -#> 3                 10.1155/8641         ISRN Ecology
            -#> 4      10.1111/(issn)1526-100x  Restoration Ecology
            -#> 5        10.1007/248.1432-184x    Microbial Ecology
            -#> 6      10.1007/10144.1438-390x   Population Ecology
            -#> 7      10.1007/10452.1573-5125      Aquatic Ecology
            -#> 8      10.1007/10682.1573-8477 Evolutionary Ecology
            -#> 9      10.1007/10745.1572-9915        Human Ecology
            -#> 10     10.1007/10980.1572-9761    Landscape Ecology
            -#> 11     10.1007/11258.1573-5052        Plant Ecology
            -#> 12     10.1007/12080.1874-1746  Theoretical Ecology
            -#> 13     10.1111/(issn)1442-9993      Austral Ecology
            -#> 14     10.1111/(issn)1439-0485       Marine Ecology
            -#> 15     10.1111/(issn)1365-2435   Functional Ecology
            -#> 16     10.1111/(issn)1365-294x    Molecular Ecology
            -#> 17     10.1111/(issn)1461-0248      Ecology Letters
            -#> 18   10.1002/9780470979365.ch7  Behavioural Ecology
            -#> 19 10.1111/fec.2007.21.issue-5                     
            -#> 20     10.1111/rec.0.0.issue-0                     
            -#>                            publisher
            -#> 1       Association for Fire Ecology
            -#> 2     Hindawi Publishing Corporation
            -#> 3     Hindawi Publishing Corporation
            -#> 4                    Wiley-Blackwell
            -#> 5  Springer Science + Business Media
            -#> 6  Springer Science + Business Media
            -#> 7  Springer Science + Business Media
            -#> 8  Springer Science + Business Media
            -#> 9  Springer Science + Business Media
            -#> 10 Springer Science + Business Media
            -#> 11 Springer Science + Business Media
            -#> 12 Springer Science + Business Media
            -#> 13                   Wiley-Blackwell
            -#> 14                   Wiley-Blackwell
            -#> 15                   Wiley-Blackwell
            -#> 16                   Wiley-Blackwell
            -#> 17                   Wiley-Blackwell
            -#> 18                   Wiley-Blackwell
            -#> 19                   Wiley-Blackwell
            -#> 20                   Wiley-Blackwell
            -
            -

            Example 2

            - -

            Get Public Library of Science article metadata via their API, make a histogram of number of tweets for each article

            -
            "http://api.plos.org/search" %>%
            -  Query(q = "*:*", wt = "json", rows = 100, 
            -        fl = "id,journal,alm_twitterCount",  
            -        fq = 'alm_twitterCount:[100 TO 10000]') %>% 
            -  .$response %>% 
            -  .$docs %>% 
            -  .$alm_twitterCount %>% 
            -  hist()
            -
            -

            image

            - -

            Notes

            - -

            Okay, so this isn't drastically different from what httr already does, but its early days.

            +From a `numeric` vector of length 2 + +as _json_ + + +```r +geojson_json(c(32.45, -99.74)) +#> {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[32.45,-99.74]},"properties":{}}]} +``` + +as a __list__ + + +```r +geojson_list(c(32.45, -99.74)) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> $features[[1]] +#> $features[[1]]$type +#> [1] "Feature" +#> +#> $features[[1]]$geometry +#> $features[[1]]$geometry$type +... +``` + +From a `data.frame` + +as __json__ + + +```r +geojson_json(us_cities[1:2, ], lat = 'lat', lon = 'long') +#> {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-99.74,32.45]},"properties":{"name":"Abilene TX","country.etc":"TX","pop":"113888","capital":"0"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[-81.52,41.08]},"properties":{"name":"Akron OH","country.etc":"OH","pop":"206634","capital":"0"}}]} +``` + +as a __list__ + + +```r +geojson_list(us_cities[1:2, ], lat = 'lat', lon = 'long') +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> $features[[1]] +#> $features[[1]]$type +#> [1] "Feature" +#> +#> $features[[1]]$geometry +#> $features[[1]]$geometry$type +... +``` + +From `SpatialPolygons` class + + +```r +library('sp') +poly1 <- Polygons(list(Polygon(cbind(c(-100, -90, -85, -100), + c(40, 50, 45, 40)))), "1") +poly2 <- Polygons(list(Polygon(cbind(c(-90, -80, -75, -90), + c(30, 40, 35, 30)))), "2") +(sp_poly <- SpatialPolygons(list(poly1, poly2), 1:2)) +#> An object of class "SpatialPolygons" +#> Slot "polygons": +#> [[1]] +#> An object of class "Polygons" +#> Slot "Polygons": +#> [[1]] +#> An object of class "Polygon" +#> Slot "labpt": +#> [1] -91.66667 45.00000 +#> +... +``` + +to __json__ + + +```r +geojson_json(sp_poly) +#> {"type":"FeatureCollection","features":[{"type":"Feature","id":1,"properties":{"dummy":0},"geometry":{"type":"Polygon","coordinates":[[[-100,40],[-90,50],[-85,45],[-100,40]]]}},{"type":"Feature","id":2,"properties":{"dummy":0},"geometry":{"type":"Polygon","coordinates":[[[-90,30],[-80,40],[-75,35],[-90,30]]]}}]} +``` + +to a __list__ + + +```r +geojson_list(sp_poly) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> $features[[1]] +#> $features[[1]]$type +#> [1] "Feature" +#> +#> $features[[1]]$id +#> [1] 1 +... +``` + +From `SpatialPoints` class + + +```r +x <- c(1, 2, 3, 4, 5) +y <- c(3, 2, 5, 1, 4) +(s <- SpatialPoints(cbind(x, y))) +#> SpatialPoints: +#> x y +#> [1,] 1 3 +#> [2,] 2 2 +#> [3,] 3 5 +#> [4,] 4 1 +#> [5,] 5 4 +#> Coordinate Reference System (CRS) arguments: NA +``` + +to __json__ + + +```r +geojson_json(s) +#> {"type":"FeatureCollection","features":[{"type":"Feature","id":1,"properties":{"dat":1},"geometry":{"type":"Point","coordinates":[1,3]}},{"type":"Feature","id":2,"properties":{"dat":2},"geometry":{"type":"Point","coordinates":[2,2]}},{"type":"Feature","id":3,"properties":{"dat":3},"geometry":{"type":"Point","coordinates":[3,5]}},{"type":"Feature","id":4,"properties":{"dat":4},"geometry":{"type":"Point","coordinates":[4,1]}},{"type":"Feature","id":5,"properties":{"dat":5},"geometry":{"type":"Point","coordinates":[5,4]}}]} +``` + +to a __list__ + + +```r +geojson_list(s) +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> $features[[1]] +#> $features[[1]]$type +#> [1] "Feature" +#> +#> $features[[1]]$id +#> [1] 1 +... +``` + +### Combine objects + +`geo_list` + `geo_list` + +> Note: `geo_list` is the output type from `geojson_list()`, it's just a list with a class attached so we know it's geojson :) + + +```r +vec <- c(-99.74, 32.45) +a <- geojson_list(vec) +vecs <- list(c(100.0, 0.0), c(101.0, 0.0), c(100.0, 0.0)) +b <- geojson_list(vecs, geometry = "polygon") +a + b +#> $type +#> [1] "FeatureCollection" +#> +#> $features +#> $features[[1]] +#> $features[[1]]$type +#> [1] "Feature" +#> +#> $features[[1]]$geometry +#> $features[[1]]$geometry$type +... +``` + +`json` + `json` + + +```r +c <- geojson_json(c(-99.74, 32.45)) +vecs <- list(c(100.0, 0.0), c(101.0, 0.0), c(101.0, 1.0), c(100.0, 1.0), c(100.0, 0.0)) +d <- geojson_json(vecs, geometry = "polygon") +c + d +#> {"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-99.74,32.45]},"properties":{}},{"type":"Feature","geometry":{"type":"Polygon","coordinates":[[[100,0],[101,0],[101,1],[100,1],[100,0]]]},"properties":[]}]} +``` + +### Write geojson + + +```r +geojson_write(us_cities[1:2, ], lat = 'lat', lon = 'long') +#> +#> Path: myfile.geojson +#> From class: data.frame +``` + +## Topojson + +In the current version of this package you can read topojson. Writing topojson was in this package, but is gone for now - will come back later as in interface to [topojson](https://github.com/mbostock/topojson) via [V8](https://github.com/jeroenooms/V8). + +Read from a file + + +```r +file <- system.file("examples", "us_states.topojson", package = "geojsonio") +out <- topojson_read(file) +``` + +Read from a URL + + +```r +url <- "https://raw.githubusercontent.com/shawnbot/d3-cartogram/master/data/us-states.topojson" +out <- topojson_read(url) +``` + +## Lint geojson + +There are two ways to do this in this package. + +### lint, locally + +Uses the javascript library [geojsonhint](https://github.com/mapbox/geojsonhint) from Mapbox. We're running this locally via the [V8](http://cran.rstudio.com/web/packages/V8/) package. + +Good + + +```r +lint('{"type": "Point", "coordinates": [-100, 80]}') +#> [1] "valid" +``` + +Bad + + +```r +lint('{"type": "Rhombus", "coordinates": [[1, 2], [3, 4], [5, 6]]}') +#> $message +#> [1] "The type Rhombus is unknown" +#> +#> $line +#> [1] 1 +``` + +### validate, with a web service + +Uses the web service at [http://geojsonlint.com/](http://geojsonlint.com/) + +Good + + +```r +validate('{"type": "Point", "coordinates": [-100, 80]}') +#> $status +#> [1] "ok" +``` + +Bad + + +```r +validate('{"type": "Rhombus", "coordinates": [[1, 2], [3, 4], [5, 6]]}') +#> $message +#> [1] "\"Rhombus\" is not a valid GeoJSON type." +#> +#> $status +#> [1] "error" +``` + +## To do + +* I'd like to replace `rgdal` with javascript libraries to read from various file types (kml, shp, etc.) and convert to geojson. This is [in development](https://github.com/ropensci/geojsonio/tree/js), and will come in the next version of this package most likely. This should make installation a bit easier as we won't have to depend on `rgdal` and `GDAL` +* Performance improvements. Some operations already use the gdal or geos C libraries, so are quite fast, though the round trip to disk and back does take significant time. I'd like to speed this up. +* More input types. We already have operations (json, list, etc.) for lots of input types (data.frame, list, sp classes), but likely there will be more added. +* Most likely add functions `topojson_list()`, `topojson_json()`

            - - Faster solr with csv + + the new way - httsnap

            - - -

            With the help of user input, I've tweaked solr just a bit to make things faster using default setings. I imagine the main interface for people using the solr R client is via solr_search(), which used to have wt=json by default. Changing this to wt=csv gives better performance. And it sorta makes sense to use csv, as the point of using an R client is probably do get data eventually into a data.frame, so it makes sense to go csv format (Already in tabular format) if it's faster too.

            - -

            Install

            - -

            Install and load solr

            -
            devtools::install_github("ropensci/solr")
            -
            library("solr")
            -library("microbenchmark")
            -
            -

            Setup

            - -

            Define base url and fields to return

            -
            url <- 'http://api.plos.org/search'
            -fields <- c('id','cross_published_journal_name','cross_published_journal_key',
            -            'cross_published_journal_eissn','pmid','pmcid','publisher','journal',
            -            'publication_date','article_type','article_type_facet','author',
            -            'author_facet','volume','issue','elocation_id','author_display',
            -            'competing_interest','copyright')
            -
            -

            json

            - -

            The previous default for solr_search() used json

            -
            solr_search(q='*:*', rows=10, fl=fields, base=url, wt = "json")
            -#> Source: local data frame [10 x 19]
            -#> 
            -#>                                                                    id
            -#> 1             10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4
            -#> 2       10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/title
            -#> 3    10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/abstract
            -#> 4  10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/references
            -#> 5        10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/body
            -#> 6             10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525
            -#> 7       10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/title
            -#> 8    10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/abstract
            -#> 9  10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/references
            -#> 10       10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/body
            -#> Variables not shown: cross_published_journal_name (chr),
            -#>   cross_published_journal_key (chr), cross_published_journal_eissn (chr),
            -#>   pmid (chr), pmcid (chr), publisher (chr), journal (chr),
            -#>   publication_date (chr), article_type (chr), article_type_facet (chr),
            -#>   author (chr), author_facet (chr), volume (int), issue (int),
            -#>   elocation_id (chr), author_display (chr), competing_interest (chr),
            -#>   copyright (chr)
            -
            -

            csv

            - -

            The default wt setting is now csv

            -
            solr_search(q='*:*', rows=10, fl=fields, base=url, wt = "json")
            -#> Source: local data frame [10 x 19]
            -#> 
            -#>                                                                    id
            -#> 1             10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4
            -#> 2       10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/title
            -#> 3    10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/abstract
            -#> 4  10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/references
            -#> 5        10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/body
            -#> 6             10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525
            -#> 7       10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/title
            -#> 8    10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/abstract
            -#> 9  10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/references
            -#> 10       10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/body
            -#> Variables not shown: cross_published_journal_name (chr),
            -#>   cross_published_journal_key (chr), cross_published_journal_eissn (chr),
            -#>   pmid (chr), pmcid (chr), publisher (chr), journal (chr),
            -#>   publication_date (chr), article_type (chr), article_type_facet (chr),
            -#>   author (chr), author_facet (chr), volume (int), issue (int),
            -#>   elocation_id (chr), author_display (chr), competing_interest (chr),
            -#>   copyright (chr)
            -
            -

            Compare times

            - -

            When parsing to a data.frame (which solr_search() does by default), csv is quite a bit faster.

            -
            microbenchmark(
            -  json = solr_search(q='*:*', rows=500, fl=fields, base=url, wt = "json", verbose = FALSE),
            -  csv = solr_search(q='*:*', rows=500, fl=fields, base=url, wt = "csv", verbose = FALSE), 
            -  times = 20
            -)
            -#> Unit: milliseconds
            -#>  expr      min       lq      mean    median        uq       max neval cld
            -#>  json 965.7043 1013.014 1124.1229 1086.3225 1227.9054 1441.8332    20   b
            -#>   csv 509.6573  520.089  541.5784  532.4546  548.0303  723.7575    20  a
            -
            -

            json vs xml vs csv

            - -

            When getting raw data, csv is best, json next, then xml pulling up the rear.

            -
            microbenchmark(
            -  json = solr_search(q='*:*', rows=1000, fl=fields, base=url, wt = "json", verbose = FALSE, raw = TRUE),
            -  csv = solr_search(q='*:*', rows=1000, fl=fields, base=url, wt = "csv", verbose = FALSE, raw = TRUE),
            -  xml = solr_search(q='*:*', rows=1000, fl=fields, base=url, wt = "xml", verbose = FALSE, raw = TRUE),
            -  times = 10
            -)
            -#> Unit: milliseconds
            -#>  expr       min       lq      mean    median        uq       max neval cld
            -#>  json 1110.9515 1142.478 1198.9981 1169.0808 1195.5709 1518.7412    10  b 
            -#>   csv  801.6871  802.516  826.0655  819.1532  835.0512  873.4266    10 a  
            -#>   xml 1507.1111 1554.002 1618.5963 1617.5208 1671.0026 1740.4448    10   c
            -
            -

            Notes

            - -

            Note that wt=csv is only available in solr_search() and solr_all() because csv writer -only returns the docs element in csv, dropping other elements, including facets, mlt, groups, -stats, etc.

            - -

            Also, note the http client used in solr is httr, which passes in a gzip compression header by default, so as long as the server serving up the Solr data has compression turned on, that's all set.

            - -

            Another way I've sped things up is if you use wt=json then parse to a data.frame, it uses dplyr which sped things up considerably.

            + + + Inspired by `httpie`, a Python command line client as a sort of drop in replacement for `curl`, I am playing around with something similar-ish in R, at least in spirit. I started a little R pkg called `httsnap` with the following ideas: + +* The web is increasingly a JSON world, so set `content-type` and `accept` headers to `applications/json` by default +* The workflow follows logically, or at least should, from, _hey, I got this url_, to _i need to add some options_, to _execute request_ +* Whenever possible, transform output to data.frame's - facilitating downstream manipulation via `dplyr`, etc. +* Do `GET` requests by default. Specify a different type if you don't want `GET`. Some functionality does GET by default, though in some cases you need to specify GET +* You can use non-standard evaluation to easily pass in query parameters without worrying about `&`'s, URL escaping, etc. (see `Query()`) +* Same for body params (see `Body()`) + +## Install + +Install and load `httsnap` + + +```r +devtools::install_github("sckott/httsnap") +``` + + +```r +library("httsnap") +library("dplyr") +``` + +## Functions so far + +* `Get` - GET request +* `Query` - add query parameters +* `Authenticate` - add authentication details +* `Progress` - add progress bar +* `Timeout` - add a timeout +* `User_agent` - add a user agent +* `Verbose` - give verbose output +* `Body` - add a body +* `h` - add headers by key-value pair + +These are named to avoid conflict with `httr` + +## Intro + +A simple `GET` request + + +```r +"http://httpbin.org/get" %>% + Get() +#> $args +#> named list() +#> +#> $headers +#> $headers$Accept +#> [1] "application/json, text/xml, application/xml, */*" +#> +#> $headers$`Accept-Encoding` +#> [1] "gzip" +#> +#> $headers$Host +#> [1] "httpbin.org" +#> +#> $headers$`User-Agent` +#> [1] "curl/7.37.1 Rcurl/1.95.4.1 httr/0.6.1 httsnap/0.0.2.99" +#> +#> +#> $origin +#> [1] "24.21.209.71" +#> +#> $url +#> [1] "http://httpbin.org/get" +``` + +You'll notice that `Get()` doesn't just get the response, but also checks for whether it was a good response (the HTTP status code), and extracts the data. + +Or you can just pass the URL into the function itself + + +```r +Get("http://httpbin.org/get") +#> $args +#> named list() +#> +#> $headers +#> $headers$Accept +#> [1] "application/json, text/xml, application/xml, */*" +#> +#> $headers$`Accept-Encoding` +#> [1] "gzip" +#> +#> $headers$Host +#> [1] "httpbin.org" +#> +#> $headers$`User-Agent` +#> [1] "curl/7.37.1 Rcurl/1.95.4.1 httr/0.6.1 httsnap/0.0.2.99" +#> +#> +#> $origin +#> [1] "24.21.209.71" +#> +#> $url +#> [1] "http://httpbin.org/get" +``` + +You can buid up options by calling functions via pipes, and see what the options look like + + +```r +"http://httpbin.org/get" %>% + Progress() %>% + Verbose() +#> +#> url: http://httpbin.org/get +#> config: +#> Config: +#> List of 4 +#> $ noprogress :FALSE +#> $ progressfunction:function (...) +#> $ debugfunction :function (...) +#> $ verbose :TRUE +``` + +Then execute the GET request when you're ready + + +```r +"http://httpbin.org/get" %>% + Progress() %>% + Verbose() %>% + Get() +#> $args +#> named list() +#> +#> $headers +#> $headers$Accept +#> [1] "application/json, text/xml, application/xml, */*" +#> +#> $headers$`Accept-Encoding` +#> [1] "gzip" +#> +#> $headers$Host +#> [1] "httpbin.org" +#> +#> $headers$`User-Agent` +#> [1] "curl/7.37.1 Rcurl/1.95.4.1 httr/0.6.1 httsnap/0.0.2.99" +#> +#> +#> $origin +#> [1] "24.21.209.71" +#> +#> $url +#> [1] "http://httpbin.org/get" +``` + +## Example 1 + +Get scholarly article metadata from the Crossref API + + +```r +"http://api.crossref.org/works" %>% + Query(query = "ecology") %>% + .$message %>% + .$items %>% + select(DOI, title, publisher) +#> DOI title +#> 1 10.4996/fireecology Fire Ecology +#> 2 10.5402/ecology ISRN Ecology +#> 3 10.1155/8641 ISRN Ecology +#> 4 10.1111/(issn)1526-100x Restoration Ecology +#> 5 10.1007/248.1432-184x Microbial Ecology +#> 6 10.1007/10144.1438-390x Population Ecology +#> 7 10.1007/10452.1573-5125 Aquatic Ecology +#> 8 10.1007/10682.1573-8477 Evolutionary Ecology +#> 9 10.1007/10745.1572-9915 Human Ecology +#> 10 10.1007/10980.1572-9761 Landscape Ecology +#> 11 10.1007/11258.1573-5052 Plant Ecology +#> 12 10.1007/12080.1874-1746 Theoretical Ecology +#> 13 10.1111/(issn)1442-9993 Austral Ecology +#> 14 10.1111/(issn)1439-0485 Marine Ecology +#> 15 10.1111/(issn)1365-2435 Functional Ecology +#> 16 10.1111/(issn)1365-294x Molecular Ecology +#> 17 10.1111/(issn)1461-0248 Ecology Letters +#> 18 10.1002/9780470979365.ch7 Behavioural Ecology +#> 19 10.1111/fec.2007.21.issue-5 +#> 20 10.1111/rec.0.0.issue-0 +#> publisher +#> 1 Association for Fire Ecology +#> 2 Hindawi Publishing Corporation +#> 3 Hindawi Publishing Corporation +#> 4 Wiley-Blackwell +#> 5 Springer Science + Business Media +#> 6 Springer Science + Business Media +#> 7 Springer Science + Business Media +#> 8 Springer Science + Business Media +#> 9 Springer Science + Business Media +#> 10 Springer Science + Business Media +#> 11 Springer Science + Business Media +#> 12 Springer Science + Business Media +#> 13 Wiley-Blackwell +#> 14 Wiley-Blackwell +#> 15 Wiley-Blackwell +#> 16 Wiley-Blackwell +#> 17 Wiley-Blackwell +#> 18 Wiley-Blackwell +#> 19 Wiley-Blackwell +#> 20 Wiley-Blackwell +``` + +## Example 2 + +Get Public Library of Science article metadata via their API, make a histogram of number of tweets for each article + + +```r +"http://api.plos.org/search" %>% + Query(q = "*:*", wt = "json", rows = 100, + fl = "id,journal,alm_twitterCount", + fq = 'alm_twitterCount:[100 TO 10000]') %>% + .$response %>% + .$docs %>% + .$alm_twitterCount %>% + hist() +``` + +![image](/public/img/2015-04-29-the-new-way/unnamed-chunk-9-1.png) + +## Notes + +Okay, so this isn't drastically different from what `httr` already does, but its early days.
            diff --git a/_site/page50/index.html b/_site/page50/index.html index cbbc5e3438..93dfd53f21 100644 --- a/_site/page50/index.html +++ b/_site/page50/index.html @@ -59,6 +59,241 @@

            Recology

              +
            +

            + + plyr's idata.frame VS. data.frame + +

            + + + + ********* +I had seen the function idata.frame in plyr before, but not really tested it. From the plyr documentation: + +> _"An immutable data frame works like an ordinary data frame, except that when you subset it, it returns a reference to the original data frame, not a a copy. This makes subsetting substantially faster and has a big impact when you are working with large datasets with many groups."_ + +For example, although baseball is a data.frame, its immutable counterpart is a reference to it: + +{% highlight r %} +> idata.frame(baseball) + +attr(,"class") +[1] "idf" "environment" +{% endhighlight %} + +Here are a few comparisons of operations on normal data frames and immutable data frames. Immutable data frames don't work with the doBy package, but do work with aggregate in base functions.  Overall, the speed gains using idata.frame are quite impressive - I will use it more often for sure. + +Here's the comparisons of idata.frames and data.frames: + +********* + + +{% highlight r %} +# load packages +require(plyr) +require(reshape2) + +# Make immutable data frame +baseball_i <- idata.frame(baseball) +{% endhighlight %} + + +### Example 1 - idata.frame more than twice as fast +{% highlight r %} +system.time(replicate(50, ddply(baseball, "year", summarise, mean(rbi)))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 8.509 0.266 8.798 +{% endhighlight %} + + + +{% highlight r %} +system.time(replicate(50, ddply(baseball_i, "year", summarise, mean(rbi)))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 7.233 0.025 7.334 +{% endhighlight %} + + + +### Example 2 - Bummer, this does not work with idata.frame's +{% highlight r %} +colwise(max, is.numeric)(baseball) # works +{% endhighlight %} + + + +{% highlight text %} + year stint g ab r h X2b X3b hr rbi sb cs bb so ibb hbp sh sf +1 2007 4 165 705 177 257 64 28 73 NA NA NA 232 NA NA NA NA NA + gidp +1 NA +{% endhighlight %} + + + +{% highlight r %} +colwise(max, is.numeric)(baseball_i) # doesn't work +{% endhighlight %} + + + +{% highlight text %} +Error: is.data.frame(df) is not TRUE +{% endhighlight %} + + +### Example 3 - idata.frame twice as fast +{% highlight r %} +system.time(replicate(100, baseball[baseball$year == "1884", ])) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 1.329 0.035 1.378 +{% endhighlight %} + + + +{% highlight r %} +system.time(replicate(100, baseball_i[baseball_i$year == "1884", ])) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 0.674 0.015 0.689 +{% endhighlight %} + + +### Example 4 - idata.frame faster +{% highlight r %} +system.time(replicate(50, melt(baseball[, 1:4], id = 1))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 7.129 0.506 7.691 +{% endhighlight %} + + + +{% highlight r %} +system.time(replicate(50, melt(baseball_i[, 1:4], id = 1))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 0.852 0.162 1.015 +{% endhighlight %} + + +### And you can go back to a data frame by +{% highlight r %} +d <- as.data.frame(baseball_i) +str(d) +{% endhighlight %} + + + +{% highlight text %} +'data.frame': 21699 obs. of 22 variables: + $ id : chr "ansonca01" "forceda01" "mathebo01" "startjo01" ... + $ year : int 1871 1871 1871 1871 1871 1871 1871 1872 1872 1872 ... + $ stint: int 1 1 1 1 1 1 1 1 1 1 ... + $ team : chr "RC1" "WS3" "FW1" "NY2" ... + $ lg : chr "" "" "" "" ... + $ g : int 25 32 19 33 29 29 29 46 37 25 ... + $ ab : int 120 162 89 161 128 146 145 217 174 130 ... + $ r : int 29 45 15 35 35 40 36 60 26 40 ... + $ h : int 39 45 24 58 45 47 37 90 46 53 ... + $ X2b : int 11 9 3 5 3 6 5 10 3 11 ... + $ X3b : int 3 4 1 1 7 5 7 7 0 0 ... + $ hr : int 0 0 0 1 3 1 2 0 0 0 ... + $ rbi : int 16 29 10 34 23 21 23 50 15 16 ... + $ sb : int 6 8 2 4 3 2 2 6 0 2 ... + $ cs : int 2 0 1 2 1 2 2 6 1 2 ... + $ bb : int 2 4 2 3 1 4 9 16 1 1 ... + $ so : int 1 0 0 0 0 1 1 3 1 0 ... + $ ibb : int NA NA NA NA NA NA NA NA NA NA ... + $ hbp : int NA NA NA NA NA NA NA NA NA NA ... + $ sh : int NA NA NA NA NA NA NA NA NA NA ... + $ sf : int NA NA NA NA NA NA NA NA NA NA ... + $ gidp : int NA NA NA NA NA NA NA NA NA NA ... +{% endhighlight %} + + +### idata.frame doesn't work with the doBy package +{% highlight r %} +require(doBy) +summaryBy(rbi ~ year, baseball_i, FUN = c(mean), na.rm = T) +{% endhighlight %} + + + +{% highlight text %} +Error: cannot coerce type 'environment' to vector of type 'any' +{% endhighlight %} + + +### But idata.frame works with aggregate in base (but with minimal speed gains) and aggregate is faster than ddply +{% highlight r %} +system.time(replicate(100, aggregate(rbi ~ year, baseball, mean))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 4.998 0.346 5.373 +{% endhighlight %} + + + +{% highlight r %} +system.time(replicate(100, aggregate(rbi ~ year, baseball_i, mean))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 4.745 0.283 5.045 +{% endhighlight %} + + + +{% highlight r %} +system.time(replicate(100, ddply(baseball_i, "year", summarise, mean(rbi)))) +{% endhighlight %} + + + +{% highlight text %} + user system elapsed + 13.293 0.042 13.428 +{% endhighlight %} + + +
            +

            @@ -68,7 +303,7 @@

            -

            I just realized that the gists code blocks don't show up in Google Reader, so you have to click the link to my blog to see the gists. Apologies for that!

            -S

            + I just realized that the gists code blocks don't show up in Google Reader, so you have to click the link to my blog to see the gists. Apologies for that!

            -S

            @@ -81,28 +316,15 @@

            -

            With all the packages (and beta stage groups of functions) for comparative phylogenetics in R (tested here: picante, geiger, ape, motmot, Liam Revell's functions), I was simply interested in which functions to use in cases where multiple functions exist to do the same thing. I only show default settings, so perhaps these functions would differ under different parameter settings.  [I am using a Mac 2.4 GHz i5, 4GB RAM]

            Get motmot here: 
            https://r-forge.r-project.org/R/?group_id=782
            Get Liam Revell's functions here: http://anolis.oeb.harvard.edu/~liam/R-phylogenetics/

            + With all the packages (and beta stage groups of functions) for comparative phylogenetics in R (tested here: picante, geiger, ape, motmot, Liam Revell's functions), I was simply interested in which functions to use in cases where multiple functions exist to do the same thing. I only show default settings, so perhaps these functions would differ under different parameter settings.  [I am using a Mac 2.4 GHz i5, 4GB RAM]

            Get motmot here: https://r-forge.r-project.org/R/?group_id=782
            Get Liam Revell's functions here: http://anolis.oeb.harvard.edu/~liam/R-phylogenetics/ -




            +


            -




            - -

            It's hard to pick an overall winner because not all functions are available in all packages, but there are definitely some functions that are faster than others.

            - -

            - -
            -

            - - RHIPE package in R for interfacing between Hadoop and R - -

            - - +


            -

            RHIPE: An Interface Between Hadoop and R
            Presented by Saptarshi Guha
            Video Link


            And this review of methods for interfacing with Hadoop suggests R's RHIPE is quite nice.

            +It's hard to pick an overall winner because not all functions are available in all packages, but there are definitely some functions that are faster than others.
            diff --git a/_site/page51/index.html b/_site/page51/index.html index aa6229cbbb..cb767d6e0b 100644 --- a/_site/page51/index.html +++ b/_site/page51/index.html @@ -59,6 +59,19 @@

            Recology

              +
            +

            + + RHIPE package in R for interfacing between Hadoop and R + +

            + + + +
            RHIPE: An Interface Between Hadoop and R
            Presented by Saptarshi Guha
            Video Link


            And this review of methods for interfacing with Hadoop suggests R's RHIPE is quite nice. + +
            +

            @@ -68,23 +81,23 @@

            -

            UPDATE: See Carl Boettiger's functions/package at Github for searching Treebase here.

            + UPDATE: See Carl Boettiger's functions/package at Github for searching Treebase here. -





            +



            -

            Treebase is a great resource for phylogenetic trees, and has a nice interface for searching for certain types of trees. However, if you want to simply download a lot of trees for analyses (like that in Davies et al.), then you want to be able to access trees in bulk (I believe Treebase folks are working on an API though). I wrote some simple code for extracting trees from Treebase.org.

            It reads an xml file of (in this case consensus) URL's for each tree, parses the xml, makes a vector of URL's, reads the nexus files with error checking, remove trees that gave errors, then a simple plot looking at metrics of the trees.

            +Treebase is a great resource for phylogenetic trees, and has a nice interface for searching for certain types of trees. However, if you want to simply download a lot of trees for analyses (like that in Davies et al.), then you want to be able to access trees in bulk (I believe Treebase folks are working on an API though). I wrote some simple code for extracting trees from Treebase.org.

            It reads an xml file of (in this case consensus) URL's for each tree, parses the xml, makes a vector of URL's, reads the nexus files with error checking, remove trees that gave errors, then a simple plot looking at metrics of the trees. -



            +

            -

            Is there an easier way to do this?

            +Is there an easier way to do this? -





            +



            -


            +
            -

            +

            @@ -95,22 +108,9 @@

            - - -

            So perhaps you have all figured this out already, but I was excited to figure out how to finally neatly get all the data frames, lists, vectors, etc. out of a nested list. It is as easy as nesting calls to the apply family of functions, in the case below, using plyr's apply like functions. Take this example:



            # Nested lists code, an example
            # Make a nested list
            mylist <- list()
            mylist_ <- list()
            for(i in 1:5) {
            for(j in 1:5) {
            mylist[[j]] <- i*j
            }
            mylist[[i]] <- mylist
            }
             
            # return values from first part of list
            laply(mylist
            [[1]], identity)
            [1] 1 2 3 4 5
             
            # return all values
            laply(mylist, function(x) laply(x, identity))
            1 2 3 4 5
            [1,] 1 2 3 4 5
            [2,] 2 4 6 8 10
            [3,] 3 6 9 12 15
            [4,] 4 8 12 16 20
            [5,] 5 10 15 20 25
             
            # perform some function, in this case sqrt of each value
            laply(mylist
            , function(x) laply(x, function(x) sqrt(x)))
              
            1 2 3 4 5
            [1,] 1.000000 1.414214 1.732051 2.000000 2.236068
            [2,] 1.414214 2.000000 2.449490 2.828427 3.162278
            [3,] 1.732051 2.449490 3.000000 3.464102 3.872983
            [4,] 2.000000 2.828427 3.464102 4.000000 4.472136
            [5,] 2.236068 3.162278 3.872983 4.472136 5.000000


            Created by Pretty R at inside-R.org

            - -
            - -
            -

            - - Running Phylip's contrast application for trait pairs from R - -

            - - + -

            Here is some code to run Phylip's contrast application from R and get the output within R to easily manipulate yourself. Importantly, the code is written specifically for trait pairs only as the regular expression work in the code specifically grabs data from contast results when only two traits are input. You could easily change the code to do N traits. Note that the p-value calculated for the chi-square statistic is not output from contrast, but is calculated within the function 'PhylipWithinSpContr'. In the code below there are two functions that make a lot of busy work easier: 'WritePhylip' and 'PhylipWithinSpContr'. The first function is nice because the formatting required for data input to Phylip programs is so, well, awkward  - and this function does it for you. The second function runs contrast and retrieves the output data. The example data set I produce in the code below has multiple individuals per species, so that contrasts are calculated taking into account within species variation. Get Phylip's contrast documentation here.

            Note that the data input format allows only 10 characters for the species name, so I suggest if your species names are longer than 10 characters use the function abbreviate, for example, to shorten all names to no longer than 10 characters. Also, within the function WritePhylip I concatenate species names and their number of individuals per species leaving plenty of space.

            Also, mess around with the options in the "system" call to get what you want. For example, I used "R", "W" and "Y", meaning replace old outfile (R), then turn on within species analyses (W), then accept all options (Y). E..g, if you don't have an old outfile, then you obviously don't need to replace the old file with the "R" command.

            (p.s. I have not tried this on a windows machine).




            Here is example output:


            > datout
            names2 dat...1. dat...2.
            1 VarAInVarAest 0.000110 -0.000017
            2 VarAIn
            VarAest -0.000017 0.000155
            3 VarAInVarEest 0.790783 -0.063097
            4 VarAIn
            VarEest -0.063097 0.981216
            5 VarAInVarAreg 1.000000 -0.107200
            6 VarAIn
            VarAreg -0.151800 1.000000
            7 VarAInVarAcorr 1.000000 -0.127600
            8 VarAIn
            VarAcorr -0.127600 1.000000
            9 VarAInVarEreg 1.000000 -0.064300
            10 VarAIn
            VarEreg -0.079800 1.000000
            11 VarAInVarEcorr 1.000000 -0.071600
            12 VarAIn
            VarEcorr -0.071600 1.000000
            13 VarAOutVarEest 0.790734 -0.063104
            14 VarAOut
            VarEest -0.063104 0.981169
            15 VarAOutVarEreg 1.000000 -0.064300
            16 VarAOut
            VarEreg -0.079800 1.000000
            17 VarAOutVarEcorr 1.000000 -0.071600
            18 VarAOut
            VarEcorr -0.071600 1.000000
            19 logLwithvardf -68.779770 6.000000
            20 logLwithoutvardf -68.771450 3.000000
            21 chisqdf -0.016640 3.000000
            22 chisq
            p 1.000000 -999.000000

            + So perhaps you have all figured this out already, but I was excited to figure out how to finally neatly get all the data frames, lists, vectors, etc. out of a nested list. It is as easy as nesting calls to the apply family of functions, in the case below, using plyr's apply like functions. Take this example:



            # Nested lists code, an example
            # Make a nested list
            mylist <- list()
            mylist_ <- list()
            for(i in 1:5) {
            for(j in 1:5) {
            mylist[[j]] <- i*j
            }
            mylist_[[i]] <- mylist
            }
             
            # return values from first part of list
            laply(mylist_[[1]], identity)
            [1] 1 2 3 4 5
             
            # return all values
            laply(mylist_, function(x) laply(x, identity))
            1 2 3 4 5
            [1,] 1 2 3 4 5
            [2,] 2 4 6 8 10
            [3,] 3 6 9 12 15
            [4,] 4 8 12 16 20
            [5,] 5 10 15 20 25
             
            # perform some function, in this case sqrt of each value
            laply(mylist_, function(x) laply(x, function(x) sqrt(x)))
              
            1 2 3 4 5
            [1,] 1.000000 1.414214 1.732051 2.000000 2.236068
            [2,] 1.414214 2.000000 2.449490 2.828427 3.162278
            [3,] 1.732051 2.449490 3.000000 3.464102 3.872983
            [4,] 2.000000 2.828427 3.464102 4.000000 4.472136
            [5,] 2.236068 3.162278 3.872983 4.472136 5.000000


            Created by Pretty R at inside-R.org
            diff --git a/_site/page52/index.html b/_site/page52/index.html index 2688dcb99a..c9befb25aa 100644 --- a/_site/page52/index.html +++ b/_site/page52/index.html @@ -61,40 +61,40 @@

            Recology

            - - Phylometa from R: Randomization via Tip Shuffle + + Running Phylip's contrast application for trait pairs from R

            - + -

            ---UPDATE: I am now using code formatting from gist.github, so I replaced the old prettyR code (sorry guys). The github way is much easier and prettier. I hope readers like the change.




            I wrote earlier about some code I wrote for running Phylometa (software to do phylogenetic meta-analysis) from R.

            I have been concerned about what exactly is the right penalty for including phylogeny in a meta-analysis. E.g.: AIC is calculated from Q in Phylometa, and Q increases with tree size.

            So, I wrote some code to shuffle the tips of your tree N number of times, run Phylometa, and extract just the "Phylogenetic MA" part of the output. So, we compare the observed output (without tip shuffling) to the distribution of the tip shuffled output, and we can calculate a P-value from that. The code I wrote simply extracts the pooled effect size for fixed and also random-effects models. But you could change the code to extract whatever you like for the randomization.

            I think the point of this code is not to estimate your pooled effects, etc., but may be an alternative way to compare traditional to phylogenetic MA where hopefully simply incorporating a tree is not penalizing the meta-analysis so much that you will always accept the traditional MA as better.

            Get the code here, and also below. Get the example tree file and data file, named "phylogeny.txt" and "metadata2g.txt", respectively below (or use your own data!). You need the file "phylometafxn.r" from my website, get here, but just call it using source as seen below.



            As you can see, the observed values fall well within the distribution of values obtained from shuffling tips.  P-values were 0.64 and 0.68 for fixed- and random-effects MA's, respectively. This suggests, to me at least, that the traditional (distribution of tip shuffled analyses, the histograms below) and phylogenetic (red lines) MA's are not different. The way I would use this is as an additional analysis to the actual Phylometa output.

            + Here is some code to run Phylip's contrast application from R and get the output within R to easily manipulate yourself. Importantly, the code is written specifically for trait pairs only as the regular expression work in the code specifically grabs data from contast results when only two traits are input. You could easily change the code to do N traits. Note that the p-value calculated for the chi-square statistic is not output from contrast, but is calculated within the function 'PhylipWithinSpContr'. In the code below there are two functions that make a lot of busy work easier: 'WritePhylip' and 'PhylipWithinSpContr'. The first function is nice because the formatting required for data input to Phylip programs is so, well, awkward  - and this function does it for you. The second function runs contrast and retrieves the output data. The example data set I produce in the code below has multiple individuals per species, so that contrasts are calculated taking into account within species variation. Get Phylip's contrast documentation here.

            Note that the data input format allows only 10 characters for the species name, so I suggest if your species names are longer than 10 characters use the function abbreviate, for example, to shorten all names to no longer than 10 characters. Also, within the function WritePhylip I concatenate species names and their number of individuals per species leaving plenty of space.

            Also, mess around with the options in the "system" call to get what you want. For example, I used "R", "W" and "Y", meaning replace old outfile (R), then turn on within species analyses (W), then accept all options (Y). E..g, if you don't have an old outfile, then you obviously don't need to replace the old file with the "R" command.

            (p.s. I have not tried this on a windows machine).




            Here is example output:


            > datout
            names2 dat...1. dat...2.
            1 VarAIn_VarAest 0.000110 -0.000017
            2 VarAIn_VarAest -0.000017 0.000155
            3 VarAIn_VarEest 0.790783 -0.063097
            4 VarAIn_VarEest -0.063097 0.981216
            5 VarAIn_VarAreg 1.000000 -0.107200
            6 VarAIn_VarAreg -0.151800 1.000000
            7 VarAIn_VarAcorr 1.000000 -0.127600
            8 VarAIn_VarAcorr -0.127600 1.000000
            9 VarAIn_VarEreg 1.000000 -0.064300
            10 VarAIn_VarEreg -0.079800 1.000000
            11 VarAIn_VarEcorr 1.000000 -0.071600
            12 VarAIn_VarEcorr -0.071600 1.000000
            13 VarAOut_VarEest 0.790734 -0.063104
            14 VarAOut_VarEest -0.063104 0.981169
            15 VarAOut_VarEreg 1.000000 -0.064300
            16 VarAOut_VarEreg -0.079800 1.000000
            17 VarAOut_VarEcorr 1.000000 -0.071600
            18 VarAOut_VarEcorr -0.071600 1.000000
            19 logL_withvar_df -68.779770 6.000000
            20 logL_withoutvar_df -68.771450 3.000000
            21 chisq_df -0.016640 3.000000
            22 chisq_p 1.000000 -999.000000

            - - RStudio Beta 2 is Out! + + Phylometa from R: Randomization via Tip Shuffle

            - + -

            RStudio Beta 2 (v0.93) « RStudio Blog


            A new beta version of RStudio is out!

            + ---UPDATE: I am now using code formatting from gist.github, so I replaced the old prettyR code (sorry guys). The github way is much easier and prettier. I hope readers like the change.




            I wrote earlier about some code I wrote for running Phylometa (software to do phylogenetic meta-analysis) from R.

            I have been concerned about what exactly is the right penalty for including phylogeny in a meta-analysis. E.g.: AIC is calculated from Q in Phylometa, and Q increases with tree size.

            So, I wrote some code to shuffle the tips of your tree N number of times, run Phylometa, and extract just the "Phylogenetic MA" part of the output. So, we compare the observed output (without tip shuffling) to the distribution of the tip shuffled output, and we can calculate a P-value from that. The code I wrote simply extracts the pooled effect size for fixed and also random-effects models. But you could change the code to extract whatever you like for the randomization.

            I think the point of this code is not to estimate your pooled effects, etc., but may be an alternative way to compare traditional to phylogenetic MA where hopefully simply incorporating a tree is not penalizing the meta-analysis so much that you will always accept the traditional MA as better.

            Get the code here, and also below. Get the example tree file and data file, named "phylogeny.txt" and "metadata_2g.txt", respectively below (or use your own data!). You need the file "phylometa_fxn.r" from my website, get here, but just call it using source as seen below.



            As you can see, the observed values fall well within the distribution of values obtained from shuffling tips.  P-values were 0.64 and 0.68 for fixed- and random-effects MA's, respectively. This suggests, to me at least, that the traditional (distribution of tip shuffled analyses, the histograms below) and phylogenetic (red lines) MA's are not different. The way I would use this is as an additional analysis to the actual Phylometa output.

            - - Adjust branch lengths with node ages: comparison of two methods + + RStudio Beta 2 is Out!

            - + -

            Here is an approach for comparing two methods of adjusting branch lengths on trees: bladj in the program Phylocom and a fxn written by Gene Hunt at the Smithsonian.

            Get the code and example files (tree and node ages) here
            Get phylocom here: http://www.phylodiversity.net/phylocom/

            Gene Hunt's method has many options you can mess with, including setting tip ages (not available in bladj), setting node ages, and minimum branch length imposed. You will notice that Gene's method may be not the appropriate if you only have extant taxa.

            The function AdjBrLens uses as input a newick tree file and a text file of node ages, and uses functions you can simply run by "source" the R file bladjingtwomethods.R file from here.

            Note that blad does not like numbers for node names, so you have to put a character in front of a number of just character names for nodes.



            # This is where the work happens... 
            # Directory below needs to have at least three items:
            # 1. phylocom executable for windows or mac
            # 2. tree newick file
            # 3. node ages file as required by phylocom, see their manual
            # Output: trees
            out is a list of three trees, the original, bladj, and Gene Hunt's method
            # Also, within the function all three trees are written to file as PDFs
            setwd("/Mac/Rstuff/Blogetc/Bladjing") # set working directory
            source("bladjingtwomethods.R") # run functions from source file
            trees
            out <- AdjBrLens("tree.txt", "nodeages.txt")
             
            # plot trees of three methods together,
            # with nodes with age estimates labeled
            jpeg("threeplots.jpeg", quality=100)
            layout(matrix(1:3, 1, 3))
            plot(treesout[[1]])
            nodelabels(trees
            out[[1]]$node.label, cex = 0.6)
            title("original tree")
            plot(treesout[[2]])
            nodelabels(trees
            out[[2]]$node.label, cex = 0.6)
            title("bladj method")
            plot(treesout[[3]])
            nodelabels(trees
            out[[3]]$node.label, cex = 0.6)
            title("gene hunt method, scalePhylo")
            dev.off()
            Created by Pretty R at inside-R.org


            It is sort of hard to see the differences in the branch length changes here, but the individual output trees will reveal the differences better.

            + RStudio Beta 2 (v0.93) « RStudio Blog

            A new beta version of RStudio is out!
            diff --git a/_site/page53/index.html b/_site/page53/index.html index 9a88dfb4cf..595e882f44 100644 --- a/_site/page53/index.html +++ b/_site/page53/index.html @@ -61,182 +61,40 @@

            Recology

            - - Phylometa from R - UDPATE + + Adjust branch lengths with node ages: comparison of two methods

            - + -

            A while back I posted some messy code to run Phylometa from R, especially useful for processing the output data from Phylometa which is not easily done. The code is still quite messy, but it should work now. I have run the code with tens of different data sets and phylogenies so it should work.


            I fixed errors when parentheses came up against numbers in the output, and other things. You can use the code for up to 4 levels of your grouping variable. In addition, there are some lines of code to plot the effect sizes with confidence intervals, comparing random and fixed effects models and phylogenetic and traditional models. 

            Get the code at my website:
            -<a href="https://gist.github.com/939971" style="border-bottom-width: 0px; border-color: initial; border-left-width: 0px; border-right-width: 0px; border-style: initial; border-top-width: 0px; color: #1c9bdc; font-style: inherit; font-weight: inherit; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px; outline-color: initial; outline-style: initial; outline-width: 0px; padding-bottom: 0px; padding-left: 0px; padding-right: 0px; padding-top: 0px; text-decoration: underline; vertical-align: baseline;" target="blank">phylometainRrun.R
            - Use the first file to do the work, calling the second file using source().
            - This new code works with Marc's new version of Phylometa, so please update: http://lajeunesse.myweb.usf.edu/publications.html

            Again, please let me know if it doesn't work, if it's worthless, what changes could make it better.

            Some notes on tree formatting for Phylometa.
            1. Trees cannot have node labels - remove them (e.g., tree$node.label < NULL).
            2. Trees cannot have zero length branches. This may seem like a non-problem, but it might be for example if you have resolved polytomies and zero length branches are added to resolve the polytomy.
            3. I think you cannot have a branch length on the root branch.

            + Here is an approach for comparing two methods of adjusting branch lengths on trees: bladj in the program Phylocom and a fxn written by Gene Hunt at the Smithsonian.

            Get the code and example files (tree and node ages) here
            Get phylocom here: http://www.phylodiversity.net/phylocom/

            Gene Hunt's method has many options you can mess with, including setting tip ages (not available in bladj), setting node ages, and minimum branch length imposed. You will notice that Gene's method may be not the appropriate if you only have extant taxa.

            The function AdjBrLens uses as input a newick tree file and a text file of node ages, and uses functions you can simply run by "source" the R file bladjing_twomethods.R file from here.

            Note that blad does not like numbers for node names, so you have to put a character in front of a number of just character names for nodes.



            # This is where the work happens... 
            # Directory below needs to have at least three items:
            # 1. phylocom executable for windows or mac
            # 2. tree newick file
            # 3. node ages file as required by phylocom, see their manual
            # Output: trees_out is a list of three trees, the original, bladj, and Gene Hunt's method
            # Also, within the function all three trees are written to file as PDFs
            setwd("/Mac/R_stuff/Blog_etc/Bladjing") # set working directory
            source("bladjing_twomethods.R") # run functions from source file
            trees_out <- AdjBrLens("tree.txt", "nodeages.txt")
             
            # plot trees of three methods together,
            # with nodes with age estimates labeled
            jpeg("threeplots.jpeg", quality=100)
            layout(matrix(1:3, 1, 3))
            plot(trees_out[[1]])
            nodelabels(trees_out[[1]]$node.label, cex = 0.6)
            title("original tree")
            plot(trees_out[[2]])
            nodelabels(trees_out[[2]]$node.label, cex = 0.6)
            title("bladj method")
            plot(trees_out[[3]])
            nodelabels(trees_out[[3]]$node.label, cex = 0.6)
            title("gene hunt method, scalePhylo")
            dev.off()
            Created by Pretty R at inside-R.org


            It is sort of hard to see the differences in the branch length changes here, but the individual output trees will reveal the differences better.

            - - Bio-ORACLE + + Phylometa from R - UDPATE

            - + -

            Bio-ORACLE


            A new dataset available of geophysical, biotic and climate data. Should be fun to play with in R.

            + A while back I posted some messy code to run Phylometa from R, especially useful for processing the output data from Phylometa which is not easily done. The code is still quite messy, but it should work now. I have run the code with tens of different data sets and phylogenies so it should work.

            I fixed errors when parentheses came up against numbers in the output, and other things. You can use the code for up to 4 levels of your grouping variable. In addition, there are some lines of code to plot the effect sizes with confidence intervals, comparing random and fixed effects models and phylogenetic and traditional models. 

            Get the code at my website:
            - Use the first file to do the work, calling the second file using source().
            - This new code works with Marc's new version of Phylometa, so please update: http://lajeunesse.myweb.usf.edu/publications.html

            Again, please let me know if it doesn't work, if it's worthless, what changes could make it better.

            Some notes on tree formatting for Phylometa.
            1. Trees cannot have node labels - remove them (e.g., tree$node.label < NULL).
            2. Trees cannot have zero length branches. This may seem like a non-problem, but it might be for example if you have resolved polytomies and zero length branches are added to resolve the polytomy.
            3. I think you cannot have a branch length on the root branch.

            - - basic ggplot2 network graphs ver2 + + Bio-ORACLE

            - - -
            - -

            I posted last week a simple function to plot networks using ggplot2 package. Here is version 2. I still need to work on figuring out efficient vertex placement.

            - -

            Changes in version 2:

            - -
              -
            • You have one of three options: use an igraph object, a matrix, or a dataframe (matrices will be converted to data frames within the function)
            • -
            • If you have data on food webs similar to that provided in the Takapoto dataset provided in the NetIndices package, you can set trophic = "TRUE", and gggraph will use the function TrophInd to assign trophic levels (the y axis value) to each vertex/node. You have to provide additional information along with this option such as what the imports and exports are, see NetIndices documentation.
            • -
            • I added some simple error checking.
            • -
            • if using method="df" and trophic="FALSE", x axis placement of vertices is now done using the function degreex (see inside the fxn), which sorts vertices according to their degree (so the least connected species are on the left of the graph; note that species with the same degree are not stacked on the y-axis because e.g., two vertices of degree=5 would get x=3 then x=4).
            • -
            - -
            - -
            # ggraph Version 2
            -require(bipartite)
            -require(igraph)
            -require(ggplot2)
            - -
            # gggraph, version 3 g = an igraph graph object, a matrix, or data frame
            -# vplace = type of vertex placement assignment, one of rnorm, runif, etc.
            -# method = one of 'df' for data frame, 'mat' for matrix or 'igraph' for an
            -# igraph graph object trophic = TRUE or FALSE for using Netindices
            -# function TrophInd to determine trophic level (y value in graph)
            -# trophinames = columns in matrix or dataframe to use for calculating
            -# trophic level import = named or refereced by col# columns of matrix or
            -# dataframe to use for import argument of TrophInd export = named or
            -# refereced by col# columns of matrix or dataframe to use for export
            -# argument of TrophInd dead = named or refereced by col# columns of matrix
            -# or dataframe to use for dead argument of TrophInd
            -
            -gggraph <- function(g, vplace = rnorm, method, trophic = "FALSE", trophinames, 
            -    import, export) {
            -    degreex <- function(x) {
            -        degreecol <- apply(x, 2, function(y) length(y[y > 0]))
            -        degreerow <- apply(x, 1, function(y) length(y[y > 0]))
            -        degrees <- sort(c(degreecol, degreerow))
            -        df <- data.frame(degrees, x = seq(1, length(degrees), 1))
            -        df$value <- rownames(df)
            -        df
            -    }
            -    # require igraph
            -    if (!require(igraph)) 
            -        stop("must first install 'igraph' package.")
            -    # require ggplot2
            -    if (!require(ggplot2)) 
            -        stop("must first install 'ggplot2' package.")
            -    
            -    if (method == "df") {
            -        if (class(g) == "matrix") {
            -            g <- as.data.frame(g)
            -        }
            -        if (class(g) != "data.frame") 
            -            stop("object must be of class 'data.frame.'")
            -        if (trophic == "FALSE") {
            -            # data preparation from adjacency matrix
            -            temp <- data.frame(expand.grid(dimnames(g))[1:2], as.vector(as.matrix(g)))
            -            temp <- temp[(temp[, 3] > 0) & !is.na(temp[, 3]), ]
            -            temp <- temp[sort.list(temp[, 1]), ]
            -            g_df <- data.frame(rows = temp[, 1], cols = temp[, 2], freqint = temp[, 
            -                3])
            -            
            -            g_df$id <- 1:length(g_df[, 1])
            -            g_df <- data.frame(id = g_df[, 4], rows = g_df[, 1], cols = g_df[, 
            -                2], freqint = g_df[, 3])
            -            g_df_ <- melt(g_df, id = c(1, 4))
            -            
            -            xy_s <- data.frame(degreex(g), y = rnorm(length(unique(g_df_$value))))
            -            g_df_2 <- merge(g_df_, xy_s, by = "value")
            -        } else if (trophic == "TRUE") {
            -            # require NetIndices
            -            if (!require(NetIndices)) 
            -                stop("must first install 'NetIndices' package.")
            -            # data preparation from adjacency matrix
            -            temp <- data.frame(expand.grid(dimnames(g[-trophinames, -trophinames]))[1:2], 
            -                as.vector(as.matrix(g[-trophinames, -trophinames])))
            -            temp <- temp[(temp[, 3] > 0) & !is.na(temp[, 3]), ]
            -            temp <- temp[sort.list(temp[, 1]), ]
            -            g_df <- data.frame(rows = temp[, 1], cols = temp[, 2], freqint = temp[, 
            -                3])
            -            
            -            g_df$id <- 1:length(g_df[, 1])
            -            g_df <- data.frame(id = g_df[, 4], rows = g_df[, 1], cols = g_df[, 
            -                2], freqint = g_df[, 3])
            -            g_df_ <- melt(g_df, id = c(1, 4))
            -            
            -            xy_s <- data.frame(value = unique(g_df_$value), x = rnorm(length(unique(g_df_$value))), 
            -                y = TrophInd(g, Import = import, Export = export)[, 1])
            -            g_df_2 <- merge(g_df_, xy_s, by = "value")
            -        }
            -        # plotting
            -        p <- ggplot(g_df_2, aes(x, y)) + geom_point(size = 5) + geom_line(aes(size = freqint, 
            -            group = id)) + geom_text(size = 3, hjust = 1.5, aes(label = value)) + 
            -            theme_bw() + opts(panel.grid.major = theme_blank(), panel.grid.minor = theme_blank(), 
            -            axis.text.x = theme_blank(), axis.text.y = theme_blank(), axis.title.x = theme_blank(), 
            -            axis.title.y = theme_blank(), axis.ticks = theme_blank(), panel.border = theme_blank(), 
            -            legend.position = "none")
            -        
            -        p  # return graph
            -    } else if (method == "igraph") {
            -        if (class(g) != "igraph") 
            -            stop("object must be of class 'igraph.'")
            -        # data preparation from igraph object
            -        g_ <- get.edgelist(g)
            -        g_df <- as.data.frame(g_)
            -        g_df$id <- 1:length(g_df[, 1])
            -        g_df <- melt(g_df, id = 3)
            -        xy_s <- data.frame(value = unique(g_df$value), x = vplace(length(unique(g_df$value))), 
            -            y = vplace(length(unique(g_df$value))))
            -        g_df2 <- merge(g_df, xy_s, by = "value")
            -        
            -        # plotting
            -        p <- ggplot(g_df2, aes(x, y)) + geom_point(size = 2) + geom_line(size = 0.3, 
            -            aes(group = id, linetype = id)) + geom_text(size = 3, hjust = 1.5, 
            -            aes(label = value)) + theme_bw() + opts(panel.grid.major = theme_blank(), 
            -            panel.grid.minor = theme_blank(), axis.text.x = theme_blank(), axis.text.y = theme_blank(), 
            -            axis.title.x = theme_blank(), axis.title.y = theme_blank(), axis.ticks = theme_blank(), 
            -            panel.border = theme_blank(), legend.position = "none")
            -        
            -        p  # return graph
            -    } else stop(paste("do not recognize method = \"", method, "\";\nmethods are \"df\" and \"igraph\"", 
            -        sep = ""))
            -}
            - -
            # Eg
            -library(NetIndices)
            -data(Takapoto)
            -gggraph(Takapoto, vplace = rnorm, method = "df", trophic = "TRUE", trophinames = c(8:10), 
            -    import = "CO2", export = c("CO2", "Sedimentation", "Grazing"))
            - -

            center

            - -
            plants <- round(rlnorm(n = 5, meanlog = 2, sdlog = 1))
            -animals <- round(rlnorm(n = 5, meanlog = 2, sdlog = 1))
            -plants <- plants * (100/sum(plants))
            -animals <- animals * (100/sum(animals))
            -z <- r2dtable(1, animals, plants)  # if errors, rerun again until no error
            -z <- as.data.frame(z[[1]])
            -rownames(z) <- c("a", "b", "c", "d", "e")
            -gggraph(z, vplace = rnorm, method = "df", trophic = "FALSE")
            + -

            center

            + Bio-ORACLE

            A new dataset available of geophysical, biotic and climate data. Should be fun to play with in R.
            diff --git a/_site/page54/index.html b/_site/page54/index.html index 5e35a8e0c1..bb3e2ee7df 100644 --- a/_site/page54/index.html +++ b/_site/page54/index.html @@ -59,6 +59,176 @@

            Recology

              +
            +

            + + basic ggplot2 network graphs ver2 + +

            + + + + ********* + +I posted last week a simple function to plot networks using ggplot2 package. Here is version 2. I still need to work on figuring out efficient vertex placement. + +Changes in version 2: + ++ You have one of three options: use an igraph object, a matrix, or a dataframe (matrices will be converted to data frames within the function) ++ If you have data on food webs similar to that provided in the Takapoto dataset provided in the NetIndices package, you can set trophic = "TRUE", and gggraph will use the function TrophInd to assign trophic levels (the y axis value) to each vertex/node. You have to provide additional information along with this option such as what the imports and exports are, see NetIndices documentation. ++ I added some simple error checking. ++ if using method="df" and trophic="FALSE", x axis placement of vertices is now done using the function degreex (see inside the fxn), which sorts vertices according to their degree (so the least connected species are on the left of the graph; note that species with the same degree are not stacked on the y-axis because e.g., two vertices of degree=5 would get x=3 then x=4). + +********* + + +{% highlight r %} +# ggraph Version 2 +require(bipartite) +require(igraph) +require(ggplot2) +{% endhighlight %} + + + +{% highlight r %} +# gggraph, version 3 g = an igraph graph object, a matrix, or data frame +# vplace = type of vertex placement assignment, one of rnorm, runif, etc. +# method = one of 'df' for data frame, 'mat' for matrix or 'igraph' for an +# igraph graph object trophic = TRUE or FALSE for using Netindices +# function TrophInd to determine trophic level (y value in graph) +# trophinames = columns in matrix or dataframe to use for calculating +# trophic level import = named or refereced by col# columns of matrix or +# dataframe to use for import argument of TrophInd export = named or +# refereced by col# columns of matrix or dataframe to use for export +# argument of TrophInd dead = named or refereced by col# columns of matrix +# or dataframe to use for dead argument of TrophInd + +gggraph <- function(g, vplace = rnorm, method, trophic = "FALSE", trophinames, + import, export) { + degreex <- function(x) { + degreecol <- apply(x, 2, function(y) length(y[y > 0])) + degreerow <- apply(x, 1, function(y) length(y[y > 0])) + degrees <- sort(c(degreecol, degreerow)) + df <- data.frame(degrees, x = seq(1, length(degrees), 1)) + df$value <- rownames(df) + df + } + # require igraph + if (!require(igraph)) + stop("must first install 'igraph' package.") + # require ggplot2 + if (!require(ggplot2)) + stop("must first install 'ggplot2' package.") + + if (method == "df") { + if (class(g) == "matrix") { + g <- as.data.frame(g) + } + if (class(g) != "data.frame") + stop("object must be of class 'data.frame.'") + if (trophic == "FALSE") { + # data preparation from adjacency matrix + temp <- data.frame(expand.grid(dimnames(g))[1:2], as.vector(as.matrix(g))) + temp <- temp[(temp[, 3] > 0) & !is.na(temp[, 3]), ] + temp <- temp[sort.list(temp[, 1]), ] + g_df <- data.frame(rows = temp[, 1], cols = temp[, 2], freqint = temp[, + 3]) + + g_df$id <- 1:length(g_df[, 1]) + g_df <- data.frame(id = g_df[, 4], rows = g_df[, 1], cols = g_df[, + 2], freqint = g_df[, 3]) + g_df_ <- melt(g_df, id = c(1, 4)) + + xy_s <- data.frame(degreex(g), y = rnorm(length(unique(g_df_$value)))) + g_df_2 <- merge(g_df_, xy_s, by = "value") + } else if (trophic == "TRUE") { + # require NetIndices + if (!require(NetIndices)) + stop("must first install 'NetIndices' package.") + # data preparation from adjacency matrix + temp <- data.frame(expand.grid(dimnames(g[-trophinames, -trophinames]))[1:2], + as.vector(as.matrix(g[-trophinames, -trophinames]))) + temp <- temp[(temp[, 3] > 0) & !is.na(temp[, 3]), ] + temp <- temp[sort.list(temp[, 1]), ] + g_df <- data.frame(rows = temp[, 1], cols = temp[, 2], freqint = temp[, + 3]) + + g_df$id <- 1:length(g_df[, 1]) + g_df <- data.frame(id = g_df[, 4], rows = g_df[, 1], cols = g_df[, + 2], freqint = g_df[, 3]) + g_df_ <- melt(g_df, id = c(1, 4)) + + xy_s <- data.frame(value = unique(g_df_$value), x = rnorm(length(unique(g_df_$value))), + y = TrophInd(g, Import = import, Export = export)[, 1]) + g_df_2 <- merge(g_df_, xy_s, by = "value") + } + # plotting + p <- ggplot(g_df_2, aes(x, y)) + geom_point(size = 5) + geom_line(aes(size = freqint, + group = id)) + geom_text(size = 3, hjust = 1.5, aes(label = value)) + + theme_bw() + opts(panel.grid.major = theme_blank(), panel.grid.minor = theme_blank(), + axis.text.x = theme_blank(), axis.text.y = theme_blank(), axis.title.x = theme_blank(), + axis.title.y = theme_blank(), axis.ticks = theme_blank(), panel.border = theme_blank(), + legend.position = "none") + + p # return graph + } else if (method == "igraph") { + if (class(g) != "igraph") + stop("object must be of class 'igraph.'") + # data preparation from igraph object + g_ <- get.edgelist(g) + g_df <- as.data.frame(g_) + g_df$id <- 1:length(g_df[, 1]) + g_df <- melt(g_df, id = 3) + xy_s <- data.frame(value = unique(g_df$value), x = vplace(length(unique(g_df$value))), + y = vplace(length(unique(g_df$value)))) + g_df2 <- merge(g_df, xy_s, by = "value") + + # plotting + p <- ggplot(g_df2, aes(x, y)) + geom_point(size = 2) + geom_line(size = 0.3, + aes(group = id, linetype = id)) + geom_text(size = 3, hjust = 1.5, + aes(label = value)) + theme_bw() + opts(panel.grid.major = theme_blank(), + panel.grid.minor = theme_blank(), axis.text.x = theme_blank(), axis.text.y = theme_blank(), + axis.title.x = theme_blank(), axis.title.y = theme_blank(), axis.ticks = theme_blank(), + panel.border = theme_blank(), legend.position = "none") + + p # return graph + } else stop(paste("do not recognize method = \"", method, "\";\nmethods are \"df\" and \"igraph\"", + sep = "")) +} +{% endhighlight %} + + + +{% highlight r %} +# Eg +library(NetIndices) +data(Takapoto) +gggraph(Takapoto, vplace = rnorm, method = "df", trophic = "TRUE", trophinames = c(8:10), + import = "CO2", export = c("CO2", "Sedimentation", "Grazing")) +{% endhighlight %} + +![center](/public/img/gggraph1.png) + + + +{% highlight r %} +plants <- round(rlnorm(n = 5, meanlog = 2, sdlog = 1)) +animals <- round(rlnorm(n = 5, meanlog = 2, sdlog = 1)) +plants <- plants * (100/sum(plants)) +animals <- animals * (100/sum(animals)) +z <- r2dtable(1, animals, plants) # if errors, rerun again until no error +z <- as.data.frame(z[[1]]) +rownames(z) <- c("a", "b", "c", "d", "e") +gggraph(z, vplace = rnorm, method = "df", trophic = "FALSE") +{% endhighlight %} + +![center](/public/img/gggraph2.png) + + + +
            +

            @@ -68,18 +238,18 @@

            -

            I have been looking around on the web and have not found anything yet related to using ggplot2 for making graphs/networks. I put together a few functions to make very simple graphs. The bipartite function especially is not ideal, as of course we only want to allow connections between unlike nodes, not all nodes. These functions do not, obviously, take full advantage of the power of ggplot2, but it’s a start.

            + I have been looking around on the web and have not found anything yet related to using ggplot2 for making graphs/networks. I put together a few functions to make very simple graphs. The bipartite function especially is not ideal, as of course we only want to allow connections between unlike nodes, not all nodes. These functions do not, obviously, take full advantage of the power of ggplot2, but it’s a start. -




            +


            -

            + -

            +

            @@ -92,20 +262,7 @@

            -

            A post over at the Phased blog (http://www.nasw.org/users/mslong/) highlights a recent paper in PLoS One by Robert Warren et al. Similar results were obtained in a 2007 Ecology Letters paper by Nekola and Brown, who showed that abundance distributions found in ecology are similar to those found for scientific citations, Eastern North American precipitation, among other things. A similar argument was made by Nee et al. in 1991 (in the journal PRSL-B). The author of the blog appears to agree with the outcome of the Warren et al. study.

            I tend to disagree.

            In the field of graphs/networks, many networks (social, sexual intercourse among humans, etc.) are found to have similar statistical properties to those of ecological networks (food webs, interactions among mutualists, etc.). However, just because these networks have similar statistical properties does not mean that the statistical properties of ecological networks have no biological meaning.

            They make the argument that the common SAD fit may be an artifact of large data sets alone. However, I don't see any explanation of why they think large data sets is a valid explanation of SADs. Surely SAD's are fit to varying sizes of datasets. The problem with small datasets is lack of statistical power to detect a particular pattern, but surely you can get a fit for a particular SAD to a small dataset.

            There are ecological mechanistic theories behind different SAD models. They argue that because very similar SADs are found in ecological and non-ecological datasets alike one option is that a universal mechanism structures ecological and non-ecological data (with the mechanism unknown in both). Why can't the same SAD pattern be generated by different mechanisms?

            Are Warren et al, Nekola, and Nee right in questioning the utility of SADs? Questioning our theories and ideas only makes the theories better in the end by weeding out shortcomings, etc.


            ResearchBlogging.org
            Warren, R., Skelly, D., Schmitz, O., & Bradford, M. (2011). Universal Ecological Patterns in College Basketball Communities PLoS ONE, 6 (3) DOI: 10.1371/journal.pone.0017342

            - -

            - -
            -

            - - cloudnumbers.com - -

            - - - -

            UPDATE: I guess it still is not actually available. Bummer...



            Has anyone used cloudnumbers.com?

            http://www.cloudnumbers.com/

            They provide cloud computing, and have built in applications, including R.

            How well does it work? Does it increase processing speed? I guess it may at the least free up RAM and processor space on your own machine.

            + A post over at the Phased blog (http://www.nasw.org/users/mslong/) highlights a recent paper in PLoS One by Robert Warren et al. Similar results were obtained in a 2007 Ecology Letters paper by Nekola and Brown, who showed that abundance distributions found in ecology are similar to those found for scientific citations, Eastern North American precipitation, among other things. A similar argument was made by Nee et al. in 1991 (in the journal PRSL-B). The author of the blog appears to agree with the outcome of the Warren et al. study.

            I tend to disagree.

            In the field of graphs/networks, many networks (social, sexual intercourse among humans, etc.) are found to have similar statistical properties to those of ecological networks (food webs, interactions among mutualists, etc.). However, just because these networks have similar statistical properties does not mean that the statistical properties of ecological networks have no biological meaning.

            They make the argument that the common SAD fit may be an artifact of large data sets alone. However, I don't see any explanation of why they think large data sets is a valid explanation of SADs. Surely SAD's are fit to varying sizes of datasets. The problem with small datasets is lack of statistical power to detect a particular pattern, but surely you can get a fit for a particular SAD to a small dataset.

            There are ecological mechanistic theories behind different SAD models. They argue that because very similar SADs are found in ecological and non-ecological datasets alike one option is that a universal mechanism structures ecological and non-ecological data (with the mechanism unknown in both). Why can't the same SAD pattern be generated by different mechanisms?

            Are Warren et al, Nekola, and Nee right in questioning the utility of SADs? Questioning our theories and ideas only makes the theories better in the end by weeding out shortcomings, etc.


            ResearchBlogging.org
            Warren, R., Skelly, D., Schmitz, O., & Bradford, M. (2011). Universal Ecological Patterns in College Basketball Communities PLoS ONE, 6 (3) DOI: 10.1371/journal.pone.0017342
            diff --git a/_site/page55/index.html b/_site/page55/index.html index 29103fd584..20a76cd2b9 100644 --- a/_site/page55/index.html +++ b/_site/page55/index.html @@ -61,40 +61,40 @@

            Recology

            - - Five ways to visualize your pairwise comparisons + + cloudnumbers.com

            - + -

            UPDATE: At the bottom are two additional methods, and some additions (underlined) are added to the original 5 methods. Thanks for all the feedback...
            -Also, another post here about ordered-categorical data
            -Also #2, a method combining splom and hexbin packages here, for larger datasets


            In data analysis it is often nice to look at all pairwise combinations of continuous variables in scatterplots. Up until recently, I have used the function splom in the package lattice, but ggplot2 has superior aesthetics, I think anyway.

            Here a few ways to accomplish the task:

            # load packages

            require(lattice)
            require(ggplot2) 

            1) Using base graphics, function "pairs"
            pairs(iris[1:4], pch = 21)
            Created by Pretty R at inside-R.org





















            2) Using lattice package, function "splom"

            -Additional code to improve splom plots here (and see Oscar's code below in comments)

            splom(~iris[1:4])
            Created by Pretty R at inside-R.org





















            3) Using package ggplot2, function "plotmatrix"

            plotmatrix(iris[1:4])
            Created by Pretty R at inside-R.org





















            4) a function called ggcorplot by Mike Lawrence at Dalhousie University

            -get ggcorplot function at this link
            -ggcorplot is also built in to Deducer (get here); see Ian's code below in the comments
            -Lastly, an improved version of ggcorplot is built in to the ez package (get here)

            ggcorplot(
            data = iris[1:4],
            vartextsize = 5,
            cortextlimits = c(5,10))
            Created by Pretty R at inside-R.org





















            5) panel.cor function using pairs, similar to ggcorplot, but using base graphics. Not sure who wrote this function, but here is where I found it.
            panel.cor <- function(x, y, digits=2, prefix="", cex.cor) 
            {
            usr <- par("usr"); on.exit(par(usr))
            par(usr = c(0, 1, 0, 1))
            r <- abs(cor(x, y))
            txt <- format(c(r, 0.123456789), digits=digits)[1]
            txt <- paste(prefix, txt, sep="")
            if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
             
            test <- cor.test(x,y)
            # borrowed from printCoefmat
            Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
            cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
            symbols = c("**", "", "", ".", " "))
             
            text(0.5, 0.5, txt, cex = cex * r)
            text(.8, .8, Signif, cex=cex, col=2)
            }
             
            pairs(iris[1:4], lower.panel=panel.smooth, upper.panel=panel.cor)
            Created by Pretty R at inside-R.org




















            A comparison of run times...

            > system.time(pairs(iris[1:4]))
            user system elapsed
            0.138 0.008 0.156
            > system.time(splom(~iris[1:4]))
            user system elapsed
            0.003 0.000 0.003
            > system.time(plotmatrix(iris[1:4]))
            user system elapsed
            0.052 0.000 0.052
            > system.time(ggcorplot(
            + data = iris[1:4],
            vartextsize = 5,
            cortextlimits = c(5,10)))
             
            user system elapsed
            0.130 0.001 0.131
            > system.time(pairs(iris[1:4], lower.panel=panel.smooth, upper.panel=panel.cor))
            user system elapsed
            0.170 0.011 0.200
            Created by Pretty R at inside-R.org

            ...shows that splom is the fastest method, with the method using the panel.cor function pulling up the rear.



            6) given by a reader in the comments (get her/his code here). This one is nice as it gives 95% CI's for the correlation coefficients, AND histograms of each variable.




            7) a reader in the comments suggested the scatterplotMatrix (spm can be used) function in the car package. This one has the advantage of plotting distributions of each variable, and providing fits to each data with confidence intervals.

            spm(iris[1:4])









            + UPDATE: I guess it still is not actually available. Bummer...



            Has anyone used cloudnumbers.com?

            http://www.cloudnumbers.com/

            They provide cloud computing, and have built in applications, including R.

            How well does it work? Does it increase processing speed? I guess it may at the least free up RAM and processor space on your own machine.

            - - Check out Phyloseminar.org + + Five ways to visualize your pairwise comparisons

            - + -
            They have online seminars that you can join in on live, and watch later as recorded videos. Check it out at: phyloseminar.org home
            + UPDATE: At the bottom are two additional methods, and some additions (underlined) are added to the original 5 methods. Thanks for all the feedback...
            -Also, another post here about ordered-categorical data
            -Also #2, a method combining splom and hexbin packages here, for larger datasets


            In data analysis it is often nice to look at all pairwise combinations of continuous variables in scatterplots. Up until recently, I have used the function splom in the package lattice, but ggplot2 has superior aesthetics, I think anyway.

            Here a few ways to accomplish the task:

            # load packages
            require(lattice)
            require(ggplot2) 

            1) Using base graphics, function "pairs"
            pairs(iris[1:4], pch = 21)
            Created by Pretty R at inside-R.org





















            2) Using lattice package, function "splom"

            -Additional code to improve splom plots here (and see Oscar's code below in comments)

            splom(~iris[1:4])
            Created by Pretty R at inside-R.org





















            3) Using package ggplot2, function "plotmatrix"

            plotmatrix(iris[1:4])
            Created by Pretty R at inside-R.org





















            4) a function called ggcorplot by Mike Lawrence at Dalhousie University

            -get ggcorplot function at this link
            -ggcorplot is also built in to Deducer (get here); see Ian's code below in the comments
            -Lastly, an improved version of ggcorplot is built in to the ez package (get here)

            ggcorplot(
            data = iris[1:4],
            var_text_size = 5,
            cor_text_limits = c(5,10))
            Created by Pretty R at inside-R.org





















            5) panel.cor function using pairs, similar to ggcorplot, but using base graphics. Not sure who wrote this function, but here is where I found it.
            panel.cor <- function(x, y, digits=2, prefix="", cex.cor) 
            {
            usr <- par("usr"); on.exit(par(usr))
            par(usr = c(0, 1, 0, 1))
            r <- abs(cor(x, y))
            txt <- format(c(r, 0.123456789), digits=digits)[1]
            txt <- paste(prefix, txt, sep="")
            if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
             
            test <- cor.test(x,y)
            # borrowed from printCoefmat
            Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
            cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1),
            symbols = c("***", "**", "*", ".", " "))
             
            text(0.5, 0.5, txt, cex = cex * r)
            text(.8, .8, Signif, cex=cex, col=2)
            }
             
            pairs(iris[1:4], lower.panel=panel.smooth, upper.panel=panel.cor)
            Created by Pretty R at inside-R.org




















            A comparison of run times...

            > system.time(pairs(iris[1:4]))
            user system elapsed
            0.138 0.008 0.156
            > system.time(splom(~iris[1:4]))
            user system elapsed
            0.003 0.000 0.003
            > system.time(plotmatrix(iris[1:4]))
            user system elapsed
            0.052 0.000 0.052
            > system.time(ggcorplot(
            + data = iris[1:4],
            var_text_size = 5,
            cor_text_limits = c(5,10)))
             
            user system elapsed
            0.130 0.001 0.131
            > system.time(pairs(iris[1:4], lower.panel=panel.smooth, upper.panel=panel.cor))
            user system elapsed
            0.170 0.011 0.200
            Created by Pretty R at inside-R.org

            ...shows that splom is the fastest method, with the method using the panel.cor function pulling up the rear.



            6) given by a reader in the comments (get her/his code here). This one is nice as it gives 95% CI's for the correlation coefficients, AND histograms of each variable.




            7) a reader in the comments suggested the scatterplotMatrix (spm can be used) function in the car package. This one has the advantage of plotting distributions of each variable, and providing fits to each data with confidence intervals.

            spm(iris[1:4])









            - - RStudio + + Check out Phyloseminar.org

            - + -

            New thoughts: After actually using it more, it is quite nice, but I have a couple of major issues.
            1. The text editor is quite slow to scroll through.
            2. ggplot2 graphics look bad, worse than if just running R alone.

            RStudio


            Everyone seems to be excited about this...

            Is it any good? Seems great for folks just learning R, but perhaps less ideal for advanced R users?

            +
            They have online seminars that you can join in on live, and watch later as recorded videos. Check it out at: phyloseminar.org home
            diff --git a/_site/page56/index.html b/_site/page56/index.html index 4fe1426da5..35c3a27267 100644 --- a/_site/page56/index.html +++ b/_site/page56/index.html @@ -61,40 +61,40 @@

            Recology

            - - R overtakes SAS in popularity + + RStudio

            - + -

            TIOBE Software: Tiobe Index

            + New thoughts: After actually using it more, it is quite nice, but I have a couple of major issues.
            1. The text editor is quite slow to scroll through.
            2. ggplot2 graphics look bad, worse than if just running R alone.

            RStudio

            Everyone seems to be excited about this...

            Is it any good? Seems great for folks just learning R, but perhaps less ideal for advanced R users?

            - - Phenotypic selection analysis in R + + R overtakes SAS in popularity

            - + -

            I have up to recently always done my phenotypic selection analyses in SAS. I finally got some code I think works to do everything SAS would do. Feedback much appreciated!



            ########################Selection analyses#############################
            install.packages(c("car","reshape","ggplot2"))
            require(car)
            require(reshape)
            require(ggplot2)
             
            # Create data set
            dat <- data.frame(plant = seq(1,100,1),
            trait1 = rep(c(0.1,0.15,0.2,0.21,0.25,0.3,0.5,0.6,0.8,0.9,1,3,4,10,11,12,13,14,15,16), each = 5), trait2 = runif(100),
            fitness = rep(c(1,5,10,20,50), each = 20))
             
            # Make relative fitness column
            dat_ <- cbind(dat, dat$fitness/mean(dat$fitness))
            names(dat)[5] <- "relfitness"
             
            # Standardize traits
            dat
            <- cbind(dat[,-c(2:3)], rescaler(dat[,c(2:3)],"sd"))
             
            ####Selection differentials and correlations among traits, cor.prob uses function in functions.R file
            ############################################################################
            ####### Function for calculating correlation matrix, corrs below diagonal,
            ####### and P-values above diagonal
            ############################################################################
            cor.prob <- function(X, dfr = nrow(X) - 2) {
            R <- cor(X)
            above <- row(R) < col(R)
            r2 <- R[above]^2
            Fstat <- r2 * dfr / (1 - r2)
            R[above] <- 1 - pf(Fstat, 1, dfr)
            R
            }
             
            # Get selection differentials and correlations among traits in one data frame
            datseldiffs <- cov(dat[,c(3:5)]) # calculates sel'n differentials using cov
            datselcorrs <- cor.prob(dat[,c(3:5)]) # use P-values above diagonal for significance of sel'n differentials in datseldiffs
            dat
            seldiffsselcorrs <- data.frame(datseldiffs, datselcorrs) # combine the two
             
            ##########################################################################
            ####Selection gradients
            dat
            selngrad <- lm(relfitness ~ trait1 * trait2, data = dat)
            summary(dat
            selngrad) # where "Estimate" is our sel'n gradient
             
            ####Check assumptions
            shapiro.test(datselngrad$residuals) # normality, bummer, non-normal
            hist(dat
            selngrad$residuals) # plot residuals
            vif(datselngrad) # check variance inflation factors (need package car), everything looks fine
            plot(dat
            selngrad) # cycle through diagnostic plots
             
            ############################################################################
            # Plot data
            ggplot(dat, aes(trait1, relfitness)) +
            geom
            point() +
            geom_smooth(method = "lm") +
            labs(x="Trait 1",y="Relative fitness")
            ggsave("myplot.jpeg")
            Created by Pretty R at inside-R.org


            Plot of relative fitness vs. trait 1 standardized

            + TIOBE Software: Tiobe Index

            - - Phylogenetic analysis with the phangorn package: an example + + Phenotypic selection analysis in R

            - + -

            The phangorn package is a relatively new package in R for the analysis and comparison of phylogenies. See here for the Bioinformatics paper and here for the package. Here is an example of using phangorn from getting sequences to making phylogenies and visualizing them:

            Getting sequences from Genbank



            Multiple alignment



            Maximum likelihood tree reconstruction



            Visualizing trees



            Visualizing trees and traits
            Make fake traits:

            Visualize them on trees:





            + I have up to recently always done my phenotypic selection analyses in SAS. I finally got some code I think works to do everything SAS would do. Feedback much appreciated!



            ########################Selection analyses#############################
            install.packages(c("car","reshape","ggplot2"))
            require(car)
            require(reshape)
            require(ggplot2)
             
            # Create data set
            dat <- data.frame(plant = seq(1,100,1),
            trait1 = rep(c(0.1,0.15,0.2,0.21,0.25,0.3,0.5,0.6,0.8,0.9,1,3,4,10,11,12,13,14,15,16), each = 5), trait2 = runif(100),
            fitness = rep(c(1,5,10,20,50), each = 20))
             
            # Make relative fitness column
            dat_ <- cbind(dat, dat$fitness/mean(dat$fitness))
            names(dat_)[5] <- "relfitness"
             
            # Standardize traits
            dat_ <- cbind(dat_[,-c(2:3)], rescaler(dat_[,c(2:3)],"sd"))
             
            ####Selection differentials and correlations among traits, cor.prob uses function in functions.R file
            ############################################################################
            ####### Function for calculating correlation matrix, corrs below diagonal,
            ####### and P-values above diagonal
            ############################################################################
            cor.prob <- function(X, dfr = nrow(X) - 2) {
            R <- cor(X)
            above <- row(R) < col(R)
            r2 <- R[above]^2
            Fstat <- r2 * dfr / (1 - r2)
            R[above] <- 1 - pf(Fstat, 1, dfr)
            R
            }
             
            # Get selection differentials and correlations among traits in one data frame
            dat_seldiffs <- cov(dat_[,c(3:5)]) # calculates sel'n differentials using cov
            dat_selcorrs <- cor.prob(dat_[,c(3:5)]) # use P-values above diagonal for significance of sel'n differentials in dat_seldiffs
            dat_seldiffs_selcorrs <- data.frame(dat_seldiffs, dat_selcorrs) # combine the two
             
            ##########################################################################
            ####Selection gradients
            dat_selngrad <- lm(relfitness ~ trait1 * trait2, data = dat_)
            summary(dat_selngrad) # where "Estimate" is our sel'n gradient
             
            ####Check assumptions
            shapiro.test(dat_selngrad$residuals) # normality, bummer, non-normal
            hist(dat_selngrad$residuals) # plot residuals
            vif(dat_selngrad) # check variance inflation factors (need package car), everything looks fine
            plot(dat_selngrad) # cycle through diagnostic plots
             
            ############################################################################
            # Plot data
            ggplot(dat_, aes(trait1, relfitness)) +
            geom_point() +
            geom_smooth(method = "lm") +
            labs(x="Trait 1",y="Relative fitness")
            ggsave("myplot.jpeg")
            Created by Pretty R at inside-R.org


            Plot of relative fitness vs. trait 1 standardized

            diff --git a/_site/page57/index.html b/_site/page57/index.html index 23743faf81..4bf40af4bb 100644 --- a/_site/page57/index.html +++ b/_site/page57/index.html @@ -61,48 +61,40 @@

            Recology

            - - Farmer's markets data + + Phylogenetic analysis with the phangorn package: an example

            - + -

            I combined USDA data on farmer's markets in the US with population census data to get an idea of the disparity in farmers markets by state, and then also expressed per capita.

            Download USDA data here. The formatted file I used below is here (in excel format, although I read into R as csv file). The census data is read from url as below.

            California has a ton of absolute number of farmer's markets, but Vermont takes the cake by far with number of markets per capita. Iowa comes in a distant second behind Vermont in markets per capita.



            The code:

            ######## Farmer's Markets #############
            setwd("/Mac/Rstuff/Blogetc/USDAFarmersMarkets") # Set to your working directory, this is where you want to call files from and write files to
            install.packages(c("ggplot2", "RCurl")) # install all packags required below
            require(ggplot2) # plyr is libraried along with ggplot2, as ggplot2 uses plyr (as well as package reshape) functions
             
            # read market data
            markets <- read.csv("farmmarkets.csv")
            markets$state <- as.factor(gsub("Wyoming ", "Wyoming", markets$LocAddState)) # there was a typo for Wyoming
            markets <- na.omit(markets)
            str(markets)
             
            # read population census data
            popcen <- read.csv("http://www.census.gov/popest/national/files/NSTEST2009ALLDATA.csv")
            popcen <- popcen[,c(4,5,6,17)]
            str(popcen)
             
            # summarize
            markets_ <- ddply(markets, .(state), summarise,
            marketsn = length(LocAddState)
            )
             
            markets
            pop_ <- merge(markets, popcen[,-1], by.x = "state", by.y = "NAME") # merge two data sets
            markets
            pop$marketspercap <- marketspop$marketsn/marketspop$POPESTIMATE2009 # create column of markets per capita
            marketspop$marketsnst <- marketspop$marketsn/max(marketspop$marketsn)
            marketspop$marketspercapst <- marketspop$marketspercap/max(marketspop$marketspercap)
             
            # plot
            ggplot(melt(markets
            pop[,-c(2:5)]), aes(x = state, y = value, fill = variable)) +
            geom
            bar(position = "dodge") +
            coordflip()
            ggsave("fmarkets
            barplot.jpeg")
            Created by Pretty R at inside-R.org

            Note: the x-axis here is standardized value of number of markets (marketsnst) and number of markets per capita (marketspercapst).
            <a href="http://4.bp.blogspot.com/-ceVMLE6yfbk/TVyE31U6LTI/AAAAAAAAEaM/PM2LCHnLPMM/s1600/fmarkets
            barplot.jpeg" imageanchor="1" style="clear: left; margin-bottom: 1em; margin-right: 1em;">


            # maps
            tryrequire("maps")
            states <- map
            data("state")
            marketspop$statelow <- tolower(marketspop$state)
            surveysummap <- merge(states, marketspop, by.x = "region", by.y = "statelow")
            surveysummap <- surveysummap[order(surveysummap$order), ]
            str(surveysummap)
             
            qplot(long, lat, data = surveysummap, group = group, fill = marketsn, geom = "polygon", main = "Total farmer's markets") +
            scale
            fillgradient(low="green", high="black")
            ggsave("fmarkets
            mapgreen.jpeg")




            <a href="http://2.bp.blogspot.com/-I-Hqg4GtJs0/TVyE3I7BmYI/AAAAAAAAEaI/xNqBq4BqemI/s1600/fmarkets
            mapgreen.jpeg" imageanchor="1" style="clear: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="452" src="http://2.bp.blogspot.com/-I-Hqg4GtJs0/TVyE3I7BmYI/AAAAAAAAEaI/xNqBq4BqemI/s640/fmarketsmapgreen.jpeg" style="cursor: move;" width="640" />


            qplot(long, lat, data = surveysummap, group = group, fill = marketspercap, geom = "polygon", main = "Farmer's markets per person") +
            scale
            fillgradient(low="green", high="black")
             
            ggsave("fmarkerspercap
            mapgreen.jpeg")


            <a href="http://4.bp.blogspot.com/-g8zapywmu7M/TVyE2qNzFnI/AAAAAAAAEaE/yrjk5txjFgo/s1600/fmarkerspercapmapgreen.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="453" src="http://4.bp.blogspot.com/-g8zapywmu7M/TVyE2qNzFnI/AAAAAAAAEaE/yrjk5txjFgo/s640/fmarkerspercapmapgreen.jpeg" width="640" />

            <a href="http://2.bp.blogspot.com/-I-Hqg4GtJs0/TVyE3I7BmYI/AAAAAAAAEaI/xNqBq4BqemI/s1600/fmarketsmapgreen.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;">

            <a href="http://4.bp.blogspot.com/-ceVMLE6yfbk/TVyE31U6LTI/AAAAAAAAEaM/PM2LCHnLPMM/s1600/fmarketsbarplot.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;">

            + The phangorn package is a relatively new package in R for the analysis and comparison of phylogenies. See here for the Bioinformatics paper and here for the package. Here is an example of using phangorn from getting sequences to making phylogenies and visualizing them:

            Getting sequences from Genbank



            Multiple alignment



            Maximum likelihood tree reconstruction



            Visualizing trees



            Visualizing trees and traits
            Make fake traits:

            Visualize them on trees:





            - - Troubling news for the teaching of evolution + + Farmer's markets data

            - + -

            [UPDATE: i remade the maps in green, hope that helps...]

            A recent survey reported in Science ("Defeating Creationism in the Courtroom, but not in the Classroom") found that biology teachers in high school do not often accept the basis of their discipline, as do teachers in other disciplines, and thus may not teach evolution appropriately. Read more here: New York Times.

            I took a little time to play with the data provided online along with the Science article. The data is available on the Science website along with the article, and the dataset I read into R is unchanged from the original. The states abbreviations file is here (as a .xls). Here goes:

            I only played with two survey questions: q1b (no. of hours ecology is taught per year), and q1d (no. of hours evolution is taught per year). I looked at ecology and evolution as this blog is about ecology and evolution. It seems that some states that teach a lot of ecology teach a lot of evolution, but I found no correlation between the two without extreme outliers. I couldn’t help but notice my home state, TX, is near the bottom of the list on both counts - go TX! The teaching of evolution on the map produced below is less predictable than I would have though just based on my assumptions about political will in each state.


            # Analyses of Conditionality Data set of all variables, except for latitude, etc.
            setwd("/Mac/Rstuff/Blogetc/EvolutionTeaching/") # Set working directory
            library(ggplot2)
             
            # read in data, and prepare new columns
            survey <- read.csv("berkmandata.csv")
            str(survey) # (I do realize that survey is a data object in the MASS package)
             
            # Assign actual hours to survey answers
            ecol <- gsub(1, 0, survey$q1b)
            ecol <- gsub(2, 1.5, ecol)
            ecol <- gsub(3, 4, ecol)
            ecol <- gsub(4, 8, ecol)
            ecol <- gsub(5, 13, ecol)
            ecol <- gsub(6, 18, ecol)
            ecol <- gsub(7, 20, ecol)
             
            evol <- gsub(1, 0, survey$q1d)
            evol <- gsub(2, 1.5, evol)
            evol <- gsub(3, 4, evol)
            evol <- gsub(4, 8, evol)
            evol <- gsub(5, 13, evol)
            evol <- gsub(6, 18, evol)
            evol <- gsub(7, 20, evol)
             
            survey$ecol <- as.numeric(ecol)
            survey$evol <- as.numeric(evol)
             
            # ddply it
            surveysum <- ddply(survey, .(stposta), summarise,
            meanecolhrs = mean(ecol, na.rm=T),
            meanevolhrs = mean(evol, na.rm=T),
            seecolhrs = sd(ecol, na.rm=T)/sqrt(length(ecol)),
            seevolhrs = sd(evol, na.rm=T)/sqrt(length(evol)),
            numteachers = length(stposta)
            )
             
            # plotting
            limitsecol <- aes(ymax = meanecolhrs + seecolhrs, ymin = meanecolhrs - seecolhrs)
            limits
            evol <- aes(ymax = meanevolhrs + seevolhrs, ymin = meanevolhrs - seevolhrs)
             
            ggplot(surveysum, aes(x = reorder(stposta, meanecolhrs), y = meanecolhrs)) +
            geompoint() +
            geom
            errorbar(limitsecol) +
            geom
            text(aes(label = numteachers), vjust = 1, hjust = -3, size = 3) +
            coord
            flip() +
            labs(x = "State", y = "Mean hours of ecology taught \n per year (+/- 1 se)")
            ####SMALL NUMBERS BY BARS ARE NUMBER OF TEACHERS THAT RESPONDED TO THE SURVEY


             
            ggplot(surveysum, aes(x = reorder(stposta, meanevolhrs), y = meanevolhrs)) +
            geompoint() +
            geom
            errorbar(limitsevol) +
            geom
            text(aes(label = numteachers), vjust = 1, hjust = -3, size = 3) +
            coord
            flip() +
            labs(x = "State", y = "Mean hours of evolution taught \n per year (+/- 1 se)")
             ####SMALL NUMBERS BY BARS ARE NUMBER OF TEACHERS THAT RESPONDED TO THE SURVEY


             
            # map
            tryrequire("maps")
            states <- map
            data("state")
            statenames <- read.csv("/Mac/Rstuff/Code/statesabbreviations.csv")
            surveysum <- merge(surveysum, statenames, by.x = "stposta", by.y = "stateabbrev")
            survey
            summap <- merge(states, surveysum, by.x = "region", by.y = "state")
            survey
            summap <- surveysummap[order(surveysummap$order), ]
             
            qplot(long, lat, data = surveysummap, group = group, fill = meanecolhrs, geom = "polygon") + scalefillgradient(low="black", high="green")
            <a href="http://3.bp.blogspot.com/-cNO2YWHX0Hk/TVQP5B7VxmI/AAAAAAAAEZ8/GBYKNR5vUBs/s1600/surveyecolmapgreen.jpeg" imageanchor="1" style="clear: left; margin-bottom: 1em; margin-right: 1em;">


             

            qplot(long, lat, data = surveysummap, group = group, fill = meanevolhrs, geom = "polygon") + scalefillgradient(low="black", high="green")


            Created by Pretty R at inside-R.org

            + I combined USDA data on farmer's markets in the US with population census data to get an idea of the disparity in farmers markets by state, and then also expressed per capita.

            Download USDA data here. The formatted file I used below is here (in excel format, although I read into R as csv file). The census data is read from url as below.

            California has a ton of absolute number of farmer's markets, but Vermont takes the cake by far with number of markets per capita. Iowa comes in a distant second behind Vermont in markets per capita.



            The code:
            ######## Farmer's Markets #############
            setwd("/Mac/R_stuff/Blog_etc/USDAFarmersMarkets") # Set to your working directory, this is where you want to call files from and write files to
            install.packages(c("ggplot2", "RCurl")) # install all packags required below
            require(ggplot2) # plyr is libraried along with ggplot2, as ggplot2 uses plyr (as well as package reshape) functions
             
            # read market data
            markets <- read.csv("farmmarkets.csv")
            markets$state <- as.factor(gsub("Wyoming ", "Wyoming", markets$LocAddState)) # there was a typo for Wyoming
            markets <- na.omit(markets)
            str(markets)
             
            # read population census data
            popcen <- read.csv("http://www.census.gov/popest/national/files/NST_EST2009_ALLDATA.csv")
            popcen <- popcen[,c(4,5,6,17)]
            str(popcen)
             
            # summarize
            markets_ <- ddply(markets, .(state), summarise,
            markets_n = length(LocAddState)
            )
             
            markets_pop_ <- merge(markets_, popcen[,-1], by.x = "state", by.y = "NAME") # merge two data sets
            markets_pop_$marketspercap <- markets_pop_$markets_n/markets_pop_$POPESTIMATE2009 # create column of markets per capita
            markets_pop_$markets_n_st <- markets_pop_$markets_n/max(markets_pop_$markets_n)
            markets_pop_$marketspercap_st <- markets_pop_$marketspercap/max(markets_pop_$marketspercap)
             
            # plot
            ggplot(melt(markets_pop_[,-c(2:5)]), aes(x = state, y = value, fill = variable)) +
            geom_bar(position = "dodge") +
            coord_flip()
            ggsave("fmarkets_barplot.jpeg")
            Created by Pretty R at inside-R.org

            Note: the x-axis here is standardized value of number of markets (markets_n_st) and number of markets per capita (marketspercap_st).



            # maps
            try_require("maps")
            states <- map_data("state")
            markets_pop_$statelow <- tolower(markets_pop_$state)
            survey_sum_map <- merge(states, markets_pop_, by.x = "region", by.y = "statelow")
            survey_sum_map <- survey_sum_map[order(survey_sum_map$order), ]
            str(survey_sum_map)
             
            qplot(long, lat, data = survey_sum_map, group = group, fill = markets_n, geom = "polygon", main = "Total farmer's markets") +
            scale_fill_gradient(low="green", high="black")
            ggsave("fmarkets_map_green.jpeg")







            qplot(long, lat, data = survey_sum_map, group = group, fill = marketspercap, geom = "polygon", main = "Farmer's markets per person") +
            scale_fill_gradient(low="green", high="black")
             
            ggsave("fmarkerspercap_map_green.jpeg")






            - - Plants are less sex deprived when next to closely related neighbors + + Troubling news for the teaching of evolution

            - - -

            A new early online paper in American Journal of Botany by Risa Sargent and colleagues suggests that plants are less sex deprived (pollen limited) in vernal pools that have more closely related plant species. Vernal pools are (at least in my experience) small (to quite large) depressions that fill up with water with winter rains, and dry out completely in the summer. Vernal pool adapted plants flower in rings down the pool as the water dries up. Aquatic invertebrates and some herps can last through the summer by burrowing in the soil.

            - -

            The study did hand pollination experiments with a focal species, Lasthenia fremontii. They examined the relationship between these pollen limitation experiments and the relatedness of L. fremontii to the rest of the plant community in each pool.

            - -

            Plant species richness was not related to pollen limitation. Thus, at least in their study with vernal pools in California, relatedness to your plant neighbors has a greater impact than plant richness.

            - -

            The great thing about vernal pools is that they are truly terrestrial islands of habitat, surrounded by inhospitable habitat for pool species. Many vernal pools are created artificially for habitat conservation easements (e.g., here. Perhaps someone can experimentally manipulate phylogenetic diversity in artificially created pools to really get at the causal links.

            + -

            p.s. Ok, this post is not terribly R related, except for that this paper used R for some of their statistics.

            + [UPDATE: i remade the maps in green, hope that helps...]

            A recent survey reported in Science ("Defeating Creationism in the Courtroom, but not in the Classroom") found that biology teachers in high school do not often accept the basis of their discipline, as do teachers in other disciplines, and thus may not teach evolution appropriately. Read more here: New York Times.

            I took a little time to play with the data provided online along with the Science article. The data is available on the Science website along with the article, and the dataset I read into R is unchanged from the original. The states abbreviations file is here (as a .xls). Here goes:

            I only played with two survey questions: q1b (no. of hours ecology is taught per year), and q1d (no. of hours evolution is taught per year). I looked at ecology and evolution as this blog is about ecology and evolution. It seems that some states that teach a lot of ecology teach a lot of evolution, but I found no correlation between the two without extreme outliers. I couldn’t help but notice my home state, TX, is near the bottom of the list on both counts - go TX! The teaching of evolution on the map produced below is less predictable than I would have though just based on my assumptions about political will in each state.


            # Analyses of Conditionality Data set of all variables, except for latitude, etc.
            setwd("/Mac/R_stuff/Blog_etc/EvolutionTeaching/") # Set working directory
            library(ggplot2)
             
            # read in data, and prepare new columns
            survey <- read.csv("berkmandata.csv")
            str(survey) # (I do realize that survey is a data object in the MASS package)
             
            # Assign actual hours to survey answers
            ecol <- gsub(1, 0, survey$q1b)
            ecol <- gsub(2, 1.5, ecol)
            ecol <- gsub(3, 4, ecol)
            ecol <- gsub(4, 8, ecol)
            ecol <- gsub(5, 13, ecol)
            ecol <- gsub(6, 18, ecol)
            ecol <- gsub(7, 20, ecol)
             
            evol <- gsub(1, 0, survey$q1d)
            evol <- gsub(2, 1.5, evol)
            evol <- gsub(3, 4, evol)
            evol <- gsub(4, 8, evol)
            evol <- gsub(5, 13, evol)
            evol <- gsub(6, 18, evol)
            evol <- gsub(7, 20, evol)
             
            survey$ecol <- as.numeric(ecol)
            survey$evol <- as.numeric(evol)
             
            # ddply it
            survey_sum <- ddply(survey, .(st_posta), summarise,
            mean_ecol_hrs = mean(ecol, na.rm=T),
            mean_evol_hrs = mean(evol, na.rm=T),
            se_ecol_hrs = sd(ecol, na.rm=T)/sqrt(length(ecol)),
            se_evol_hrs = sd(evol, na.rm=T)/sqrt(length(evol)),
            num_teachers = length(st_posta)
            )
             
            # plotting
            limits_ecol <- aes(ymax = mean_ecol_hrs + se_ecol_hrs, ymin = mean_ecol_hrs - se_ecol_hrs)
            limits_evol <- aes(ymax = mean_evol_hrs + se_evol_hrs, ymin = mean_evol_hrs - se_evol_hrs)
             
            ggplot(survey_sum, aes(x = reorder(st_posta, mean_ecol_hrs), y = mean_ecol_hrs)) +
            geom_point() +
            geom_errorbar(limits_ecol) +
            geom_text(aes(label = num_teachers), vjust = 1, hjust = -3, size = 3) +
            coord_flip() +
            labs(x = "State", y = "Mean hours of ecology taught \n per year (+/- 1 se)")
            ####SMALL NUMBERS BY BARS ARE NUMBER OF TEACHERS THAT RESPONDED TO THE SURVEY


             
            ggplot(survey_sum, aes(x = reorder(st_posta, mean_evol_hrs), y = mean_evol_hrs)) +
            geom_point() +
            geom_errorbar(limits_evol) +
            geom_text(aes(label = num_teachers), vjust = 1, hjust = -3, size = 3) +
            coord_flip() +
            labs(x = "State", y = "Mean hours of evolution taught \n per year (+/- 1 se)")
             ####SMALL NUMBERS BY BARS ARE NUMBER OF TEACHERS THAT RESPONDED TO THE SURVEY


             
            # map
            try_require("maps")
            states <- map_data("state")
            statenames <- read.csv("/Mac/R_stuff/Code/states_abbreviations.csv")
            survey_sum_ <- merge(survey_sum, statenames, by.x = "st_posta", by.y = "state_abbrev")
            survey_sum_map <- merge(states, survey_sum_, by.x = "region", by.y = "state")
            survey_sum_map <- survey_sum_map[order(survey_sum_map$order), ]
             
            qplot(long, lat, data = survey_sum_map, group = group, fill = mean_ecol_hrs, geom = "polygon") + scale_fill_gradient(low="black", high="green")



             

            qplot(long, lat, data = survey_sum_map, group = group, fill = mean_evol_hrs, geom = "polygon") + scale_fill_gradient(low="black", high="green")


            Created by Pretty R at inside-R.org
            diff --git a/_site/page58/index.html b/_site/page58/index.html index 6c38706d6e..8bdb5a735e 100644 --- a/_site/page58/index.html +++ b/_site/page58/index.html @@ -61,48 +61,64 @@

            Recology

            - - Good riddance to Excel pivot tables + + Plants are less sex deprived when next to closely related neighbors

            - + -

            Excel pivot tables have been how I have reorganized data...up until now. These are just a couple of examples why R is superior to Excel for reorganizing data:

            + A new early online paper in [American Journal of Botany][ajb] by [Risa Sargent][] and colleagues suggests that plants are less sex deprived (pollen limited) in vernal pools that have more closely related plant species. Vernal pools are (at least in my experience) small (to quite large) depressions that fill up with water with winter rains, and dry out completely in the summer. Vernal pool adapted plants flower in rings down the pool as the water dries up. Aquatic invertebrates and some herps can last through the summer by burrowing in the soil. -

            UPDATE: I fixed the code to use 'dcast' instead of 'cast'. And library(ggplot2) instead of library(plyr) [plyr is called along with ggplot2]. Thanks Bob!

            +The study did hand pollination experiments with a focal species, _Lasthenia fremontii_. They examined the relationship between these pollen limitation experiments and the relatedness of _L. fremontii_ to the rest of the plant community in each pool. -

            Also, see another post on this topic here.

            +Plant species richness was not related to pollen limitation. Thus, at least in their study with vernal pools in California, relatedness to your plant neighbors has a greater impact than plant richness. - +The great thing about vernal pools is that they are truly terrestrial islands of habitat, surrounded by inhospitable habitat for pool species. Many vernal pools are created artificially for habitat conservation easements (e.g., [here][]. Perhaps someone can experimentally manipulate phylogenetic diversity in artificially created pools to really get at the causal links. -

            Figure

            +p.s. Ok, this post is not terribly R related, except for that this paper used R for some of their statistics. + +[here]:http://www.vollmarconsulting.com/projects/caltrans-madera-pools/index.html +[ajb]: http://www.amjbot.org/cgi/content/abstract/ajb.1000329v1 +[Risa Sargent]: http://mysite.science.uottawa.ca/rsargent/

            - - R and Google Visualization API: Fish harvests + + Good riddance to Excel pivot tables

            - + + + Excel pivot tables have been how I have reorganized data...up until now. These are just a couple of examples why R is superior to Excel for reorganizing data: -

            I recently gathered fish harvest data from the U.S. National Oceanic and Atmospheric Administarion (NOAA), which I downloaded from Infochimps. The data is fish harvest by weight and value, by species for 21 years, from 1985 to 2005.

            Here is a link to a google document of the data I used below. I had to do some minor pocessing in Excel first; thus the link to this data.
            https://spreadsheets.google.com/ccc?key=0Aq6aW8n11tSdFRySXQzYkppLXFaU2F5aC04d19ZS0E&hl=en

            Get the original data from Infochimps here:
            http://infochimps.com/datasets/domestic-fish-and-shellfish-catch-value-and-price-by-species-198





            ################# Fish harvest data ########################################
            setwd("/Mac/R
            stuff/Blogetc/Infochimps/Fishharvest") # Set path
            library(ggplot2)
            library(googleVis)
            library(Hmisc)
             
            fish <- read.csv("fishharvest.csv") # read data
            fish2 <- melt(fish,id=1:3,measure=4:24) # melt table
            year <- rep(1985:2005, each = 117)
            fish2 <- data.frame(fish2,year) # replace year with actual values
             
            # Google visusalization API
            fishdata <- data.frame(subset(fish2,fish2$var == "quantity
            1000lbs",-4),value1000dollars=subset(fish2,fish2$var == "value1000dollars",-4)[,4])
            names(fishdata)[4] <- "quantity1000lbs"
            fishharvest <- gvisMotionChart(fishdata, idvar="species", timevar="year")
            plot(fishharvest)
            Created by Pretty R at inside-R.org




            <div id="MotionChart
            2011-01-17-08-09-24" style="height: 500px; width: 600px;">
            Data: fishdata, Chart ID: MotionChart2011-01-17-08-09-24


            R version 2.12.1 (2010-12-16),

            Google Terms of Use



            fishdatagg2 <- ddply(fish2,.(species,var),summarise,
            mean = mean(value),
            se = sd(value)/sqrt(length(value))
            )
            fishdatagg2 <- subset(fishdatagg2,fishdatagg2$var %in% c("quantity
            1000lbs","value1000dollars"))
            limit3 <- aes(ymax = mean + se, ymin = mean - se)
            bysppfgrid <- ggplot(fishdatagg2,aes(x=reorder(species,rank(mean)),y=mean,colour=species)) + geom
            point() + geomerrorbar(limit3) + facetgrid(. ~ var, scales="free") + opts(legend.position="none") + coordflip() + scaleycontinuous(trans="log")
            ggsave("bysppfgrid.jpeg")
            Created by Pretty R at inside-R.org


            <a href="http://2.bp.blogspot.com/fANWq796z-w/TTRvw6n41xI/AAAAAAAAEYk/aaoDVQC8kk/s1600/bysppfgrid.jpeg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"><img border="0" height="640" src="http://2.bp.blogspot.com/fANWq796z-w/TTRvw6n41xI/AAAAAAAAEYk/aaoDVQ_C8kk/s640/bysppfgrid.jpeg" width="500" />

            +UPDATE: I fixed the code to use 'dcast' instead of 'cast'. And library(ggplot2) instead of library(plyr) [plyr is called along with ggplot2]. Thanks Bob! + +Also, see another post on this topic [here][]. + + + + + +![Figure](/pivottable1.png) + +[here]: http://news.mrdwab.com/2010/08/08/using-the-reshape-packagein-r/

            - - R and Google Visualization API: Wikispeedia + + R and Google Visualization API: Fish harvests

            - + -

            Wikispeedia is a website trying to gather all speed limit signs on Earth. I recently created a Google Visualization for some of their data, specifically on speed limit signs that change speed throughout the day. Check it out here. Here is how to see and comment on what they are doing: website, and Google groups.

            + I recently gathered fish harvest data from the U.S. National Oceanic and Atmospheric Administarion (NOAA), which I downloaded from Infochimps. The data is fish harvest by weight and value, by species for 21 years, from 1985 to 2005.

            Here is a link to a google document of the data I used below. I had to do some minor pocessing in Excel first; thus the link to this data.
            https://spreadsheets.google.com/ccc?key=0Aq6aW8n11tS_dFRySXQzYkppLXFaU2F5aC04d19ZS0E&hl=en

            Get the original data from Infochimps here:
            http://infochimps.com/datasets/domestic-fish-and-shellfish-catch-value-and-price-by-species-198




            ################# Fish harvest data ########################################
            setwd("/Mac/R_stuff/Blog_etc/Infochimps/Fishharvest") # Set path
            library(ggplot2)
            library(googleVis)
            library(Hmisc)
             
            fish <- read.csv("fishharvest.csv") # read data
            fish2 <- melt(fish,id=1:3,measure=4:24) # melt table
            year <- rep(1985:2005, each = 117)
            fish2 <- data.frame(fish2,year) # replace year with actual values
             
            # Google visusalization API
            fishdata <- data.frame(subset(fish2,fish2$var == "quantity_1000lbs",-4),value_1000dollars=subset(fish2,fish2$var == "value_1000dollars",-4)[,4])
            names(fishdata)[4] <- "quantity_1000lbs"
            fishharvest <- gvisMotionChart(fishdata, idvar="species", timevar="year")
            plot(fishharvest)
            Created by Pretty R at inside-R.org




            Data: fishdata, Chart ID: MotionChart_2011-01-17-08-09-24


            R version 2.12.1 (2010-12-16),

            Google Terms of Use



            fishdatagg2 <- ddply(fish2,.(species,var),summarise,
            mean = mean(value),
            se = sd(value)/sqrt(length(value))
            )
            fishdatagg2 <- subset(fishdatagg2,fishdatagg2$var %in% c("quantity_1000lbs","value_1000dollars"))
            limit3 <- aes(ymax = mean + se, ymin = mean - se)
            bysppfgrid <- ggplot(fishdatagg2,aes(x=reorder(species,rank(mean)),y=mean,colour=species)) + geom_point() + geom_errorbar(limit3) + facet_grid(. ~ var, scales="free") + opts(legend.position="none") + coord_flip() + scale_y_continuous(trans="log")
            ggsave("bysppfgrid.jpeg")
            Created by Pretty R at inside-R.org


            diff --git a/_site/page59/index.html b/_site/page59/index.html index 0342b1f8f8..8f5edf4681 100644 --- a/_site/page59/index.html +++ b/_site/page59/index.html @@ -61,40 +61,48 @@

            Recology

            - - Bipartite networks and R + + R and Google Visualization API: Wikispeedia

            - + + + Wikispeedia is a website trying to gather all speed limit signs on Earth. I recently created a Google Visualization for some of their data, specifically on speed limit signs that change speed throughout the day. Check it out [here][]. Here is how to see and comment on what they are doing: [website][], and [Google groups][groups]. -

            Earlier, I posted about generating networks from abundance distributions that you specify. If this post was interesting, check out Jeff Kilpatrick's website, where he provides code he produced in R and Octave to compare real bipartite networks to ones generated based on ecological variables measured in the field (in our case it was abundance, body size, and nectar production). We used that code for a paper we published]paper. Code was modified from code produced by Diego P. Vazquez.

            +[here]: http://groups.google.com/group/wikispeedia/browse_thread/thread/c9c712125a597b16 +[website]: http://www.wikispeedia.org/ +[groups]: http://groups.google.com/group/wikispeedia?lnk=

            - - Just for fun: Recovery.gov data snooping + + Bipartite networks and R

            - + + + Earlier, I posted about [generating networks from abundance distributions that you specify][link]. If this post was interesting, check out Jeff Kilpatrick's website, where he provides code he produced in R and Octave to compare real bipartite networks to ones generated based on ecological variables measured in the field (in our case it was abundance, body size, and nectar production). We used that code for a paper we published][paper]. Code was modified from code produced by [Diego P. Vazquez][diego]. -





            Okay, so this isn't ecology related at all, but I like exploring data sets. So here goes...

            Propublica has some awesome data sets available at their website: http://www.propublica.org/tools/
            I played around with their data set on Recovery.gov (see hyperlink below in code). Here's some figures:

            Mean award amount, ranked by mean amount, and also categorized by number of grants received ("nfund") by state (by size and color of point).  Yes, there are 56 "states", which includes things like Northern Marian Islands (MP). Notice that California got the largest number of awards, but the mean award size was relatively small.
            Here is a figure by government organization that awarded each award, by mean award size (y-axis), number of awards (x-axis), and number of jobs created (numjobs=text size). Notice that the FCC (Federal Communications Commission) created nearly the most jobs despite not giving very large awards (although they did give a lot of awards).



            Here is a figure of mean awards by state on a map of the US:


            And by number of awards by state:





            Here is the code:

            ################################################################################
            #################### Propublica Recovery.gov data ####################
            ################################################################################
            install.packages(c("ggplot2","maps","stringr"))
            library(ggplot2)
            library(maps)
            library(stringr)
            setwd("/Mac/Rstuff/Blogetc") # Set working directory
            themeset(themebw())
             
            # Read propublica data from file (download from here: http://propublica.s3.amazonaws.com/assets/recoverygov/propublica-recoverygov-primary-2.xls
            propubdat <- read.csv("propublica-recoverygov-primary-2.csv")
            str(propubdat)
             
            # Summarize data
            fundbystate <- ddply(propubdat,.(primestate),summarise,
            meanfund = mean(award
            amount),
            sefund = sd(awardamount)/sqrt(length(awardamount)),
            nfund = length(awardamount),
            numjobs = mean(number
            ofjobs)
            )
             
            fundbyagency <- ddply(propubdat,.(funding
            agencyname),summarise,
            meanfund = mean(award
            amount),
            sefund = sd(awardamount)/sqrt(length(awardamount)),
            nfund = length(awardamount),
            numjobs = mean(number
            ofjobs)
            )
             
             
            fun1 <- function(a) {str
            c(paste(na.omit(strextract(unlist(strsplit(unlist(as.character(a[1])), " ")), "[A-Z]{1}"))), collapse="")} # Fxn to make funding agency name abbreviations within ddply below
             
            fundbyagency2 <- ddply(fundbyagency,.(fundingagencyname),transform, # add to table funding agency name abbreviations
            agencyabbrev = fun1(fundingagencyname)
            )
             
            # Plot data, means and se's by state
            limits <- aes(ymax = meanfund + sefund, ymin = meanfund - sefund)
            dodge <- position
            dodge(width=0.6)
            awardbystate <- ggplot(fundbystate,aes(x=reorder(primestate,meanfund),y=meanfund,colour=nfund)) + geompoint(aes(size=nfund),position=dodge) + coordflip() + geomerrorbar(limits, width=0.2,position=dodge) + opts(panel.grid.major = themeblank(),panel.grid.minor=themeblank(),legend.position=c(0.7,0.2)) + labs(x="State",y="Mean grant amount awarded +/- 1 s.e.")
            ggsave("awardbystate.jpeg")
             
            # Plot data, means and se's by funding agency
            limits2 <- aes(ymax = meanfund + sefund, ymin = meanfund - sefund)
            dodge <- positiondodge(width=0.6)
            awardbyagency <- ggplot(fundbyagency2,aes(y=log(meanfund),x=log(nfund),label=agency
            abbrev)) + geomtext(aes(size=numjobs))
            ggsave("awardbyagency.jpeg")
             
             
            # On US map
            fundbystate2 <- read.csv("fundbystate.csv")
             
            states <- map
            data("state") # get state geographic data from the maps package
            recovmap <- merge(states,fundbystate2,by="region") # merage datasets
             
            qplot(long,lat,data=recovmap,group=group,fill=meanfund,geom="polygon")
            ggsave("bystatemapmeans.jpeg")
             
            qplot(long,lat,data=recovmap,group=group,fill=nfund,geom="polygon")
            ggsave("bystatemapnumber.jpeg")

            Created by Pretty R at inside-R.org

            And the text file fundbystate2 here. I had the make this file separately so I could get in the spelled out state names as they were not provided in the propublica dataset.

            Source and disclaimer:
            Data provided by Propublica. Data may contain errors and/or omissions.

            +[link]: http://r-ecology.blogspot.com/2011/01/ecological-networks-from-abundance.html +[paper]: http://www.springerlink.com/content/1055615l6m74mp30/ +[diego]: http://www.cricyt.edu.ar/interactio/dvazquez/html/index_e.html

            - - R and Google Visualization API + + Just for fun: Recovery.gov data snooping

            - + -

            R interfaces with the powerful Google Visualization API with the package googleVis (see here). It's relatively easy to convert your graphics in R to interactive graphics to post on a web browser. And the graphics are quite nice, as seen below in a simple graph of some of my data collected from this summer on seed predation to Helianthus annuus seeds in Texas:



            <div id="MotionChart2011-01-08-19-31-57" style="height: 500px; width: 600px;">

            Data: data2, Chart ID: MotionChart_2011-01-08-19-31-57


            R version 2.12.1 (2010-12-16),

            Google Terms of Use






            <%@include file="../src/simpleFooter.rsp"%>

            +



            Okay, so this isn't ecology related at all, but I like exploring data sets. So here goes...

            Propublica has some awesome data sets available at their website: http://www.propublica.org/tools/
            I played around with their data set on Recovery.gov (see hyperlink below in code). Here's some figures:

            Mean award amount, ranked by mean amount, and also categorized by number of grants received ("nfund") by state (by size and color of point).  Yes, there are 56 "states", which includes things like Northern Marian Islands (MP). Notice that California got the largest number of awards, but the mean award size was relatively small.
            Here is a figure by government organization that awarded each award, by mean award size (y-axis), number of awards (x-axis), and number of jobs created (numjobs=text size). Notice that the FCC (Federal Communications Commission) created nearly the most jobs despite not giving very large awards (although they did give a lot of awards).



            Here is a figure of mean awards by state on a map of the US:


            And by number of awards by state:





            Here is the code:

            ################################################################################
            #################### Propublica Recovery.gov data ####################
            ################################################################################
            install.packages(c("ggplot2","maps","stringr"))
            library(ggplot2)
            library(maps)
            library(stringr)
            setwd("/Mac/R_stuff/Blog_etc") # Set working directory
            theme_set(theme_bw())
             
            # Read propublica data from file (download from here: http://propublica.s3.amazonaws.com/assets/recoverygov/propublica-recoverygov-primary-2.xls
            propubdat <- read.csv("propublica-recoverygov-primary-2.csv")
            str(propubdat)
             
            # Summarize data
            fundbystate <- ddply(propubdat,.(prime_state),summarise,
            meanfund = mean(award_amount),
            sefund = sd(award_amount)/sqrt(length(award_amount)),
            nfund = length(award_amount),
            numjobs = mean(number_of_jobs)
            )
             
            fundbyagency <- ddply(propubdat,.(funding_agency_name),summarise,
            meanfund = mean(award_amount),
            sefund = sd(award_amount)/sqrt(length(award_amount)),
            nfund = length(award_amount),
            numjobs = mean(number_of_jobs)
            )
             
             
            fun1 <- function(a) {str_c(paste(na.omit(str_extract(unlist(str_split(unlist(as.character(a[1])), " ")), "[A-Z]{1}"))), collapse="")} # Fxn to make funding agency name abbreviations within ddply below
             
            fundbyagency2 <- ddply(fundbyagency,.(funding_agency_name),transform, # add to table funding agency name abbreviations
            agency_abbrev = fun1(funding_agency_name)
            )
             
            # Plot data, means and se's by state
            limits <- aes(ymax = meanfund + sefund, ymin = meanfund - sefund)
            dodge <- position_dodge(width=0.6)
            awardbystate <- ggplot(fundbystate,aes(x=reorder(prime_state,meanfund),y=meanfund,colour=nfund)) + geom_point(aes(size=nfund),position=dodge) + coord_flip() + geom_errorbar(limits, width=0.2,position=dodge) + opts(panel.grid.major = theme_blank(),panel.grid.minor=theme_blank(),legend.position=c(0.7,0.2)) + labs(x="State",y="Mean grant amount awarded +/- 1 s.e.")
            ggsave("awardbystate.jpeg")
             
            # Plot data, means and se's by funding agency
            limits2 <- aes(ymax = meanfund + sefund, ymin = meanfund - sefund)
            dodge <- position_dodge(width=0.6)
            awardbyagency <- ggplot(fundbyagency2,aes(y=log(meanfund),x=log(nfund),label=agency_abbrev)) + geom_text(aes(size=numjobs))
            ggsave("awardbyagency.jpeg")
             
             
            # On US map
            fundbystate2 <- read.csv("fundbystate.csv")
             
            states <- map_data("state") # get state geographic data from the maps package
            recovmap <- merge(states,fundbystate2,by="region") # merage datasets
             
            qplot(long,lat,data=recovmap,group=group,fill=meanfund,geom="polygon")
            ggsave("bystatemapmeans.jpeg")
             
            qplot(long,lat,data=recovmap,group=group,fill=nfund,geom="polygon")
            ggsave("bystatemapnumber.jpeg")

            Created by Pretty R at inside-R.org

            And the text file fundbystate2 here. I had the make this file separately so I could get in the spelled out state names as they were not provided in the propublica dataset.

            Source and disclaimer:
            Data provided by Propublica. Data may contain errors and/or omissions.
            diff --git a/_site/page6/index.html b/_site/page6/index.html index 397909ed59..eb8788a358 100644 --- a/_site/page6/index.html +++ b/_site/page6/index.html @@ -61,362 +61,595 @@

            Recology

            - - PUT dataframes on your couch + + Faster solr with csv

            - + + + With the [help of user input](https://github.com/ropensci/solr/issues/47), I've tweaked `solr` just a bit to make things faster using default setings. I imagine the main interface for people using the `solr` R client is via `solr_search()`, which used to have `wt=json` by default. Changing this to `wt=csv` gives better performance. And it sorta makes sense to use csv, as the point of using an R client is probably do get data eventually into a data.frame, so it makes sense to go csv format (Already in tabular format) if it's faster too. + +## Install + +Install and load `solr` + + +```r +devtools::install_github("ropensci/solr") +``` + + +```r +library("solr") +library("microbenchmark") +``` + +## Setup + +Define base url and fields to return + + +```r +url <- 'http://api.plos.org/search' +fields <- c('id','cross_published_journal_name','cross_published_journal_key', + 'cross_published_journal_eissn','pmid','pmcid','publisher','journal', + 'publication_date','article_type','article_type_facet','author', + 'author_facet','volume','issue','elocation_id','author_display', + 'competing_interest','copyright') +``` + +## json + +The previous default for `solr_search()` used `json` + + +```r +solr_search(q='*:*', rows=10, fl=fields, base=url, wt = "json") +#> Source: local data frame [10 x 19] +#> +#> id +#> 1 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4 +#> 2 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/title +#> 3 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/abstract +#> 4 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/references +#> 5 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/body +#> 6 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525 +#> 7 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/title +#> 8 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/abstract +#> 9 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/references +#> 10 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/body +#> Variables not shown: cross_published_journal_name (chr), +#> cross_published_journal_key (chr), cross_published_journal_eissn (chr), +#> pmid (chr), pmcid (chr), publisher (chr), journal (chr), +#> publication_date (chr), article_type (chr), article_type_facet (chr), +#> author (chr), author_facet (chr), volume (int), issue (int), +#> elocation_id (chr), author_display (chr), competing_interest (chr), +#> copyright (chr) +``` + +## csv + +The default `wt` setting is now `csv` + + +```r +solr_search(q='*:*', rows=10, fl=fields, base=url, wt = "json") +#> Source: local data frame [10 x 19] +#> +#> id +#> 1 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4 +#> 2 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/title +#> 3 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/abstract +#> 4 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/references +#> 5 10.1371/annotation/856f0890-9d85-4719-8e54-c27530ac94f4/body +#> 6 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525 +#> 7 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/title +#> 8 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/abstract +#> 9 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/references +#> 10 10.1371/annotation/8551e3d5-fdd5-413b-a253-170ba13b7525/body +#> Variables not shown: cross_published_journal_name (chr), +#> cross_published_journal_key (chr), cross_published_journal_eissn (chr), +#> pmid (chr), pmcid (chr), publisher (chr), journal (chr), +#> publication_date (chr), article_type (chr), article_type_facet (chr), +#> author (chr), author_facet (chr), volume (int), issue (int), +#> elocation_id (chr), author_display (chr), competing_interest (chr), +#> copyright (chr) +``` + +## Compare times + +When parsing to a data.frame (which `solr_search()` does by default), csv is quite a bit faster. + + +```r +microbenchmark( + json = solr_search(q='*:*', rows=500, fl=fields, base=url, wt = "json", verbose = FALSE), + csv = solr_search(q='*:*', rows=500, fl=fields, base=url, wt = "csv", verbose = FALSE), + times = 20 +) +#> Unit: milliseconds +#> expr min lq mean median uq max neval cld +#> json 965.7043 1013.014 1124.1229 1086.3225 1227.9054 1441.8332 20 b +#> csv 509.6573 520.089 541.5784 532.4546 548.0303 723.7575 20 a +``` + +## json vs xml vs csv + +When getting raw data, csv is best, json next, then xml pulling up the rear. + + +```r +microbenchmark( + json = solr_search(q='*:*', rows=1000, fl=fields, base=url, wt = "json", verbose = FALSE, raw = TRUE), + csv = solr_search(q='*:*', rows=1000, fl=fields, base=url, wt = "csv", verbose = FALSE, raw = TRUE), + xml = solr_search(q='*:*', rows=1000, fl=fields, base=url, wt = "xml", verbose = FALSE, raw = TRUE), + times = 10 +) +#> Unit: milliseconds +#> expr min lq mean median uq max neval cld +#> json 1110.9515 1142.478 1198.9981 1169.0808 1195.5709 1518.7412 10 b +#> csv 801.6871 802.516 826.0655 819.1532 835.0512 873.4266 10 a +#> xml 1507.1111 1554.002 1618.5963 1617.5208 1671.0026 1740.4448 10 c +``` + +## Notes + +Note that `wt=csv` is only available in `solr_search()` and `solr_all()` because csv writer +only returns the docs element in csv, dropping other elements, including facets, mlt, groups, +stats, etc. + +Also, note the http client used in `solr` is `httr`, which passes in a gzip compression header by default, so as long as the server serving up the Solr data has compression turned on, that's all set. + +Another way I've sped things up is if you use `wt=json` then parse to a data.frame, it uses `dplyr` which sped things up considerably. -

            It would be nice to easily push each row or column of a data.frame into CouchDB instead of having to prepare them yourself into JSON, then push in to couch. I recently added ability to push data.frame's into couch using the normal PUT /{db} method, and added support for the couch bulk API.

            - -

            Install

            -
            install.packages("devtools")
            -devtools::install_github("sckott/sofa")
            -
            library("sofa")
            -
            -

            PUT /db

            - -

            You can write directly from a data.frame, either by rows or columns. First, rows:

            -
            #> $ok
            -#> [1] TRUE
            -
            -

            Create a database

            -
            db_create(dbname="mtcarsdb")
            -#> $ok
            -#> [1] TRUE
            -
            out <- doc_create(mtcars, dbname="mtcarsdb", how="rows")
            -out[1:2]
            -#> $`Mazda RX4`
            -#> $`Mazda RX4`$ok
            -#> [1] TRUE
            -#> 
            -#> $`Mazda RX4`$id
            -#> [1] "0063109bfb1c15765854cbc9525c3a7a"
            -#> 
            -#> $`Mazda RX4`$rev
            -#> [1] "1-3946941c894a874697554e3e6d9bc176"
            -#> 
            -#> 
            -#> $`Mazda RX4 Wag`
            -#> $`Mazda RX4 Wag`$ok
            -#> [1] TRUE
            -#> 
            -#> $`Mazda RX4 Wag`$id
            -#> [1] "0063109bfb1c15765854cbc9525c461d"
            -#> 
            -#> $`Mazda RX4 Wag`$rev
            -#> [1] "1-273ff17a938cb956cba21051ab428b95"
            -
            -

            Then by columns

            -
            out <- doc_create(mtcars, dbname="mtcarsdb", how="columns")
            -out[1:2]
            -#> $mpg
            -#> $mpg$ok
            -#> [1] TRUE
            -#> 
            -#> $mpg$id
            -#> [1] "0063109bfb1c15765854cbc9525d4f1f"
            -#> 
            -#> $mpg$rev
            -#> [1] "1-4b83d0ef53a28849a872d47ad03fef9a"
            -#> 
            -#> 
            -#> $cyl
            -#> $cyl$ok
            -#> [1] TRUE
            -#> 
            -#> $cyl$id
            -#> [1] "0063109bfb1c15765854cbc9525d57d3"
            -#> 
            -#> $cyl$rev
            -#> [1] "1-c21bfa5425c67743f0826fd4b44b0dbf"
            -
            -

            Bulk API

            - -

            The bulk API will/should be faster for larger data.frames

            -
            #> $ok
            -#> [1] TRUE
            -
            -

            We'll use part of the diamonds dataset

            -
            library("ggplot2")
            -dat <- diamonds[1:20000,]
            -
            -

            Create a database

            -
            db_create(dbname="bulktest")
            -#> $ok
            -#> [1] TRUE
            -
            -

            Load by row (could instead do each column, see how parameter), printing the time it takes

            -
            system.time(out <- bulk_create(dat, dbname="bulktest"))
            -#>    user  system elapsed 
            -#>  16.832   6.039  24.432
            -
            -

            The returned data is the same as with doc_create()

            -
            out[1:2]
            -#> [[1]]
            -#> [[1]]$ok
            -#> [1] TRUE
            -#> 
            -#> [[1]]$id
            -#> [1] "0063109bfb1c15765854cbc9525d8b8d"
            -#> 
            -#> [[1]]$rev
            -#> [1] "1-f407fe4935af7fd17c101f13d3c81679"
            -#> 
            -#> 
            -#> [[2]]
            -#> [[2]]$ok
            -#> [1] TRUE
            -#> 
            -#> [[2]]$id
            -#> [1] "0063109bfb1c15765854cbc9525d998b"
            -#> 
            -#> [[2]]$rev
            -#> [1] "1-cf8b9a9dcdc026052a663d6fef8a36fe"
            -
            -

            So that's 20,000 rows in not that much time, not bad.

            - -

            not dataframes

            - -

            You can also pass in lists or vectors of json as character strings, e.g.,

            - -

            lists

            -
            #> $ok
            -#> [1] TRUE
            -
            row.names(mtcars) <- NULL # get rid of row.names
            -lst <- parse_df(mtcars, tojson=FALSE)
            -db_create(dbname="bulkfromlist")
            -#> $ok
            -#> [1] TRUE
            -out <- bulk_create(lst, dbname="bulkfromlist")
            -out[1:2]
            -#> [[1]]
            -#> [[1]]$ok
            -#> [1] TRUE
            -#> 
            -#> [[1]]$id
            -#> [1] "ba70c46d73707662b1e204a90fcd9bb8"
            -#> 
            -#> [[1]]$rev
            -#> [1] "1-3946941c894a874697554e3e6d9bc176"
            -#> 
            -#> 
            -#> [[2]]
            -#> [[2]]$ok
            -#> [1] TRUE
            -#> 
            -#> [[2]]$id
            -#> [1] "ba70c46d73707662b1e204a90fcda9f6"
            -#> 
            -#> [[2]]$rev
            -#> [1] "1-273ff17a938cb956cba21051ab428b95"
            -
            -

            json

            -
            #> $ok
            -#> [1] TRUE
            -
            strs <- as.character(parse_df(mtcars, "columns"))
            -db_create(dbname="bulkfromchr")
            -#> $ok
            -#> [1] TRUE
            -out <- bulk_create(strs, dbname="bulkfromchr")
            -out[1:2]
            -#> [[1]]
            -#> [[1]]$ok
            -#> [1] TRUE
            -#> 
            -#> [[1]]$id
            -#> [1] "ba70c46d73707662b1e204a90fce8c20"
            -#> 
            -#> [[1]]$rev
            -#> [1] "1-4b83d0ef53a28849a872d47ad03fef9a"
            -#> 
            -#> 
            -#> [[2]]
            -#> [[2]]$ok
            -#> [1] TRUE
            -#> 
            -#> [[2]]$id
            -#> [1] "ba70c46d73707662b1e204a90fce9bc1"
            -#> 
            -#> [[2]]$rev
            -#> [1] "1-c21bfa5425c67743f0826fd4b44b0dbf"
            -

            - - csl - an R client for Citation Style Language data + + PUT dataframes on your couch

            - + + + It would be nice to easily push each row or column of a data.frame into CouchDB instead of having to prepare them yourself into JSON, then push in to couch. I recently added ability to push data.frame's into couch using the normal `PUT /{db}` method, and added support for the couch bulk API. + +## Install + + +```r +install.packages("devtools") +devtools::install_github("sckott/sofa") +``` + + +```r +library("sofa") +``` + +## PUT /db + +You can write directly from a data.frame, either by rows or columns. First, rows: + + +``` +#> $ok +#> [1] TRUE +``` + +Create a database + + +```r +db_create(dbname="mtcarsdb") +#> $ok +#> [1] TRUE +``` + + +```r +out <- doc_create(mtcars, dbname="mtcarsdb", how="rows") +out[1:2] +#> $`Mazda RX4` +#> $`Mazda RX4`$ok +#> [1] TRUE +#> +#> $`Mazda RX4`$id +#> [1] "0063109bfb1c15765854cbc9525c3a7a" +#> +#> $`Mazda RX4`$rev +#> [1] "1-3946941c894a874697554e3e6d9bc176" +#> +#> +#> $`Mazda RX4 Wag` +#> $`Mazda RX4 Wag`$ok +#> [1] TRUE +#> +#> $`Mazda RX4 Wag`$id +#> [1] "0063109bfb1c15765854cbc9525c461d" +#> +#> $`Mazda RX4 Wag`$rev +#> [1] "1-273ff17a938cb956cba21051ab428b95" +``` + +Then by columns + + +```r +out <- doc_create(mtcars, dbname="mtcarsdb", how="columns") +out[1:2] +#> $mpg +#> $mpg$ok +#> [1] TRUE +#> +#> $mpg$id +#> [1] "0063109bfb1c15765854cbc9525d4f1f" +#> +#> $mpg$rev +#> [1] "1-4b83d0ef53a28849a872d47ad03fef9a" +#> +#> +#> $cyl +#> $cyl$ok +#> [1] TRUE +#> +#> $cyl$id +#> [1] "0063109bfb1c15765854cbc9525d57d3" +#> +#> $cyl$rev +#> [1] "1-c21bfa5425c67743f0826fd4b44b0dbf" +``` + +## Bulk API + +The bulk API will/should be faster for larger data.frames + + +``` +#> $ok +#> [1] TRUE +``` + +We'll use part of the diamonds dataset + + +```r +library("ggplot2") +dat <- diamonds[1:20000,] +``` + +Create a database + + +```r +db_create(dbname="bulktest") +#> $ok +#> [1] TRUE +``` + +Load by row (could instead do each column, see `how` parameter), printing the time it takes + + +```r +system.time(out <- bulk_create(dat, dbname="bulktest")) +#> user system elapsed +#> 16.832 6.039 24.432 +``` + +The returned data is the same as with `doc_create()` + + +```r +out[1:2] +#> [[1]] +#> [[1]]$ok +#> [1] TRUE +#> +#> [[1]]$id +#> [1] "0063109bfb1c15765854cbc9525d8b8d" +#> +#> [[1]]$rev +#> [1] "1-f407fe4935af7fd17c101f13d3c81679" +#> +#> +#> [[2]] +#> [[2]]$ok +#> [1] TRUE +#> +#> [[2]]$id +#> [1] "0063109bfb1c15765854cbc9525d998b" +#> +#> [[2]]$rev +#> [1] "1-cf8b9a9dcdc026052a663d6fef8a36fe" +``` + +So that's 20,000 rows in not that much time, not bad. + +### not dataframes + +You can also pass in lists or vectors of json as character strings, e.g., + +_lists_ + + +``` +#> $ok +#> [1] TRUE +``` + + +```r +row.names(mtcars) <- NULL # get rid of row.names +lst <- parse_df(mtcars, tojson=FALSE) +db_create(dbname="bulkfromlist") +#> $ok +#> [1] TRUE +out <- bulk_create(lst, dbname="bulkfromlist") +out[1:2] +#> [[1]] +#> [[1]]$ok +#> [1] TRUE +#> +#> [[1]]$id +#> [1] "ba70c46d73707662b1e204a90fcd9bb8" +#> +#> [[1]]$rev +#> [1] "1-3946941c894a874697554e3e6d9bc176" +#> +#> +#> [[2]] +#> [[2]]$ok +#> [1] TRUE +#> +#> [[2]]$id +#> [1] "ba70c46d73707662b1e204a90fcda9f6" +#> +#> [[2]]$rev +#> [1] "1-273ff17a938cb956cba21051ab428b95" +``` + +_json_ + + +``` +#> $ok +#> [1] TRUE +``` + + +```r +strs <- as.character(parse_df(mtcars, "columns")) +db_create(dbname="bulkfromchr") +#> $ok +#> [1] TRUE +out <- bulk_create(strs, dbname="bulkfromchr") +out[1:2] +#> [[1]] +#> [[1]]$ok +#> [1] TRUE +#> +#> [[1]]$id +#> [1] "ba70c46d73707662b1e204a90fce8c20" +#> +#> [[1]]$rev +#> [1] "1-4b83d0ef53a28849a872d47ad03fef9a" +#> +#> +#> [[2]] +#> [[2]]$ok +#> [1] TRUE +#> +#> [[2]]$id +#> [1] "ba70c46d73707662b1e204a90fce9bc1" +#> +#> [[2]]$rev +#> [1] "1-c21bfa5425c67743f0826fd4b44b0dbf" +``` -

            CSL (Citation Style Language) is used quite widely now to specify citations in a standard fashion. csl is an R client for exploring CSL styles, and is inspired by the Ruby gem csl. For example, csl is given back in the PLOS Lagotto article level metric API (follow http://alm.plos.org/api/v5/articles?ids=10.1371%252Fjournal.pone.0025110&info=detail&source_id=crossref).

            - -

            Let me know if you have any feedback at the repo https://github.com/ropensci/csl

            - -

            Install

            -
            install.packages("devtools")
            -devtools::install_github("ropensci/csl")
            -
            library("csl")
            -
            -

            Load CSL style from a URL

            - -

            You can load CSL styles from either a URL or a local file on your machine. Firt, from a URL. In this case from the Zotero style repository, for the American Journal or Political Science.

            -
            jps <- style_load('http://www.zotero.org/styles/american-journal-of-political-science')
            -
            -

            A list is returned, which you can index to various parts of the style specification.

            -
            jps$info
            -#> $title
            -#> [1] "American Journal of Political Science"
            -#> 
            -#> $title_short
            -#> [1] "AJPS"
            -#> 
            -#> $id
            -#> [1] "http://www.zotero.org/styles/american-journal-of-political-science"
            -#> 
            -#> $author
            -...
            -
            jps$title
            -#> [1] "American Journal of Political Science"
            -
            jps$citation_format
            -#> [1] "author-date"
            -
            jps$links_template
            -#> [1] "http://www.zotero.org/styles/american-political-science-association"
            -
            jps$editor
            -#> $editor
            -#> $editor$variable
            -#> [1] "editor"
            -#> 
            -#> $editor$delimiter
            -#> [1] ", "
            -#> 
            -#> 
            -#> $label
            -#> $label$form
            -...
            -
            jps$author
            -#> $author
            -#> $author$variable
            -#> [1] "author"
            -#> 
            -#> 
            -#> $label
            -#> $label$form
            -#> [1] "short"
            -#> 
            -#> $label$prefix
            -...
            -
            -

            Get raw XML

            - -

            You can also get raw XML if you'd rather deal with that format.

            -
            style_xml('http://www.zotero.org/styles/american-journal-of-political-science')
            -#> <?xml version="1.0" encoding="utf-8"?>
            -#> <style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" demote-non-dropping-particle="sort-only" default-locale="en-US">
            -#>   <info>
            -#>     <title>American Journal of Political Science</title>
            -#>     <title-short>AJPS</title-short>
            -#>     <id>http://www.zotero.org/styles/american-journal-of-political-science</id>
            -#>     <link href="http://www.zotero.org/styles/american-journal-of-political-science" rel="self"/>
            -#>     <link href="http://www.zotero.org/styles/american-political-science-association" rel="template"/>
            -#>     <link href="http://www.ajps.org/AJPS%20Style%20Guide.pdf" rel="documentation"/>
            -#>     <author>
            -...
            -
            -

            Get styles

            - -

            There is a GitHub repository of CSL styles at https://github.com/citation-style-language/styles-distribution. These don't come with the csl package, so you have to run get_styles() to get them on your machine. The default path is Sys.getenv("HOME")/styles, which for me is /Users/sacmac/styles. You can change where files are saved by using the path parameter.

            -
            get_styles()
            -#> 
            -#> Done! Files put in /Users/sacmac/styles
            -
            -

            After getting styles locally you can load them just as we did with style_load(), but from your machine. However, since the file is local, we can make this easier by allowing just the name of the style, like

            -
            style_load("apa")
            -#> $info
            -#> $info$title
            -#> [1] "American Psychological Association 6th edition"
            -#> 
            -#> $info$title_short
            -#> [1] "APA"
            -#> 
            -#> $info$id
            -#> [1] "http://www.zotero.org/styles/apa"
            -#> 
            -...
            -
            -

            If you are unsure if a style exists, you can use style_exists()

            -
            style_exists("helloworld")
            -#> [1] FALSE
            -style_exists("acs-nano")
            -#> [1] TRUE
            -
            -

            In addition, you can list the path for a single style, more than 1, or all styles with styles()

            -
            styles("apa")
            -#> [1] "/Users/sacmac/styles/apa.csl"
            -
            -

            All of them, truncated for blog brevity

            -
            styles()
            -#> $independent
            -#>    [1] "academy-of-management-review"                                                         
            -#>    [2] "acm-sig-proceedings-long-author-list"                                                 
            -#>    [3] "acm-sig-proceedings"                                                                  
            -#>    [4] "acm-sigchi-proceedings-extended-abstract-format"                                      
            -#>    [5] "acm-sigchi-proceedings"                                                               
            -#>    [6] "acm-siggraph"                                                                         
            -#>    [7] "acs-nano"                                                                             
            -#>    [8] "acta-anaesthesiologica-scandinavica"                                                  
            -#>    [9] "acta-anaesthesiologica-taiwanica"                                                     
            -...
            -
            -

            Get locales

            - -

            In addition to styles, there is a GitHub repo for locales at https://github.com/citation-style-language/locales. These also don't come with the csl package, so you have to run get_locales() to get them on your machine. Same goes here for paths as above for styles.

            -
            get_locales()
            -#> 
            -#> Done! Files put in /Users/sacmac/locales
            -

            - - Elasticsearch backup and restore + + csl - an R client for Citation Style Language data

            - - -

            setup backup

            -
            curl -XPUT 'http://localhost:9200/_snapshot/my_backup/' -d '{
            -    "type": "fs",
            -    "settings": {
            -        "location": "/Users/sacmac/esbackups/my_backup",
            -        "compress": true
            -    }
            -}'
            -
            -

            create backup

            -
            http PUT "localhost:9200/_snapshot/my_backup/snapshot_2?wait_for_completion=true"
            -
            -

            get info on snapshot

            -
            http "localhost:9200/_snapshot/my_backup/snapshot_2"
            -
            -

            restore

            -
            curl -XPOST "localhost:9200/_snapshot/my_backup/snapshot_2/_restore"
            -
            -

            partial restore, including various options that can be used

            -
            curl -XPOST "localhost:9200/_snapshot/my_backup/snapshot_2/_restore" -d '{
            -    "indices": "index_1,index_2",
            -    "ignore_unavailable": "true",
            -    "include_global_state": false,
            -    "rename_pattern": "index_(.+)",
            -    "rename_replacement": "restored_index_$1"
            -}'
            -
            + + + CSL (Citation Style Language) is used quite widely now to specify citations in a standard fashion. `csl` is an R client for exploring CSL styles, and is inspired by the Ruby gem [csl](https://github.com/inukshuk/csl-ruby). For example, csl is given back in the PLOS Lagotto article level metric API (follow [http://alm.plos.org/api/v5/articles?ids=10.1371%252Fjournal.pone.0025110&info=detail&source_id=crossref](http://alm.plos.org/api/v5/articles?ids=10.1371%252Fjournal.pone.0025110&info=detail&source_id=crossref)). + +Let me know if you have any feedback at the repo [https://github.com/ropensci/csl](https://github.com/ropensci/csl) + +## Install + + +```r +install.packages("devtools") +devtools::install_github("ropensci/csl") +``` + + +```r +library("csl") +``` + +## Load CSL style from a URL + +You can load CSL styles from either a URL or a local file on your machine. Firt, from a URL. In this case from the Zotero style repository, for the American Journal or Political Science. + + +```r +jps <- style_load('http://www.zotero.org/styles/american-journal-of-political-science') +``` + +A list is returned, which you can index to various parts of the style specification. + + +```r +jps$info +#> $title +#> [1] "American Journal of Political Science" +#> +#> $title_short +#> [1] "AJPS" +#> +#> $id +#> [1] "http://www.zotero.org/styles/american-journal-of-political-science" +#> +#> $author +... +``` + + +```r +jps$title +#> [1] "American Journal of Political Science" +``` + + +```r +jps$citation_format +#> [1] "author-date" +``` + + +```r +jps$links_template +#> [1] "http://www.zotero.org/styles/american-political-science-association" +``` + + +```r +jps$editor +#> $editor +#> $editor$variable +#> [1] "editor" +#> +#> $editor$delimiter +#> [1] ", " +#> +#> +#> $label +#> $label$form +... +``` + + +```r +jps$author +#> $author +#> $author$variable +#> [1] "author" +#> +#> +#> $label +#> $label$form +#> [1] "short" +#> +#> $label$prefix +... +``` + +## Get raw XML + +You can also get raw XML if you'd rather deal with that format. + + +```r +style_xml('http://www.zotero.org/styles/american-journal-of-political-science') +#> +#>