Fix some spelling errors

ropensci · Sep 5, 2022 · 4c0f9e4 · 4c0f9e4
1 parent db425b0
commit 4c0f9e4
Show file tree

Hide file tree

Showing 12 changed files with 40 additions and 31 deletions.
diff --git a/R/db-setup-tools.R b/R/db-setup-tools.R
@@ -5,7 +5,7 @@
 #' user interactively selects the parts of GenBank to download (e.g. primates,
 #' plants, bacteria ...).
 #' This is an internal function so the download can be wrapped in `while()` to
-#' enable persistant downloading.
+#' enable persistent downloading.
 #' @details
 #' The downloaded files will appear in the restez filepath under downloads.
 #' @param db Database type, only 'nucleotide' currently available.

diff --git a/R/gb-setup-tools.R b/R/gb-setup-tools.R
@@ -37,7 +37,7 @@ flatfile_read <- function(flpth) {
 #' raw format, see ?charToRaw, in order to save on RAM.
 #' The raw_record contains the entire GenBank record in text format.
 #'
-#' Use `acc_filter` and max and min sequence lengths to minimise the size of the
+#' Use `acc_filter` and max and min sequence lengths to minimize the size of the
 #' database. All sequences have to be at least as long as min and less than or
 #' equal in length to max, unless max is NULL in which there is no maximum
 #' length. The final selection of sequences is the result of applying all
@@ -50,7 +50,7 @@ flatfile_read <- function(flpth) {
 #' the database as specified by `invert`.
 #' @param invert Logical vector of length 1; if TRUE, accessions in `acc_filter`
 #' will be excluded from the database; if FALSE, only accessions in `acc_filter`
-#' will be included in the databasse. Default FALSE.
+#' will be included in the database. Default FALSE.
 #' @return data.frame, or NULL if no records pass filters
 #' @family private
 gb_df_generate <- function(records, min_length=0, max_length=NULL,

diff --git a/man/db_create.Rd b/man/db_create.Rd
diff --git a/man/db_download_intern.Rd b/man/db_download_intern.Rd
diff --git a/man/gb_build.Rd b/man/gb_build.Rd
diff --git a/man/gb_df_generate.Rd b/man/gb_df_generate.Rd
diff --git a/vignettes/1_rodents.Rmd b/vignettes/1_rodents.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "1. Build a database of all rodents"
-date: "2022-09-02"
+date: "2022-09-05"
 output: rmarkdown::html_vignette
 ---
 
@@ -37,11 +37,11 @@ db_create(min_length = 100, max_length = 1000)
 ```r
 restez_status()
 #> Checking setup status at  ...
-#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 #> Restez path ...
 #> ... Path '[RODENTS PATH]/restez'
 #> ... Does path exist? 'Yes'
-#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 #> Download ...
 #> ... Path '[RODENTS PATH]/restez/downloads'
 #> ... Does path exist? 'Yes'
@@ -50,7 +50,7 @@ restez_status()
 #> ... GenBank division selections 'Rodent'
 #> ... GenBank Release 251
 #> ... Last updated '2022-08-30 20:55:13'
-#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+#> ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
 #> Database ...
 #> ... Path '[RODENTS PATH]/restez/sql_db'
 #> ... Does path exist? 'Yes'

diff --git a/vignettes/2_search_and_fetch.Rmd b/vignettes/2_search_and_fetch.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "2. How to search for and fetch sequences"
-date: "2022-09-02"
+date: "2022-09-05"
 output: rmarkdown::html_vignette
 ---
 
@@ -55,7 +55,7 @@ system.time(expr = {
   coi_sequences <- gb_fasta_get(id = accessions)
   })
 #>    user  system elapsed 
-#>   0.202   0.100   0.257
+#>   0.203   0.106   0.288
 # time via Entrez
 system.time(expr = {
   coi_sequences_p1 <- rentrez::entrez_fetch(db = 'nucleotide',
@@ -75,7 +75,7 @@ system.time(expr = {
                                             rettype = 'fasta')
   })
 #>    user  system elapsed 
-#>   0.051   0.007   5.425
+#>   0.050   0.008   5.252
 ```
 <!-- Below is no longer relevant now that the size of sequences in the db has been limited.
 ## Missing

diff --git a/vignettes/3_parsing.Rmd b/vignettes/3_parsing.Rmd
@@ -172,33 +172,42 @@ You can try out the above functions yourself on any sequence record by downloadi
 library(restez)
 restez_path_set(rodents_path)
 (rand_id <- sample(suppressWarnings(list_db_ids()), 1))
-#> [1] "AB007601"
+#> [1] "AB001427"
 record <- gb_record_get(rand_id)
 (gb_extract(record = record, what = 'features'))
 #> [[1]]
 #> [[1]]$type
 #> [1] "source"
 #> 
 #> [[1]]$location
-#> [1] "1..536"
+#> [1] "1..510"
 #> 
 #> [[1]]$organism
-#> [1] "Rattus norvegicus"
+#> [1] "Mus musculus"
 #> 
 #> [[1]]$mol_type
 #> [1] "mRNA"
 #> 
 #> [[1]]$strain
-#> [1] "Wistar"
+#> [1] "ICR"
 #> 
 #> [[1]]$db_xref
-#> [1] "taxon:10116misc_feature    <1..>536"
+#> [1] "taxon:10090"
+#> 
+#> [[1]]$dev_stage
+#> [1] "4 weekCDS             <1..>510"
+#> 
+#> [[1]]$codon_start
+#> [1] "1"
 #> 
 #> [[1]]$product
-#> [1] "sproutin"
+#> [1] "motor domain of KIF11"
+#> 
+#> [[1]]$protein_id
+#> [1] "BAA22387.1"
 #> 
-#> [[1]]$note
-#> [1] "neuronal diferentiation factor"
+#> [[1]]$translation
+#> [1] "IFTYGQTGTGKTFTMEGERSPNEVCTWEEGPLAGIIPRTLHQIFEKLTDNGTEFSVKVSLLEIYNEELFDLLSPSSDVSERLQMFDDPRNKRGVIIKDLEEITVHNKDEVYQILEKGAAKRTTAATLMNAYSSRSHSVFSVTIHMKETTIDGEELVKIGKLNLVDLAGSE"
 ```
 
 ## Next up

diff --git a/vignettes/4_phylotar.Rmd b/vignettes/4_phylotar.Rmd
@@ -5,7 +5,7 @@ output: rmarkdown::html_vignette
 
 
 
-In this tutorial we will showcase how a `restez` database can be used to speed up a [phylotaR](https://github.com/ropensci/phylotaR) run. `phylotaR` runs an automated pipeline for identifying ortholgous gene clusters as the first step in a phylogenetic analysis. A user provides a taxonomic identity and the pipeline downloads all relevant sequences and identifies clusters using a local-alignment search tool. For more information on `phylotaR` see its [published article](https://doi.org/10.3390/life8020020).
+In this tutorial we will showcase how a `restez` database can be used to speed up a [phylotaR](https://github.com/ropensci/phylotaR) run. `phylotaR` runs an automated pipeline for identifying orthologous gene clusters as the first step in a phylogenetic analysis. A user provides a taxonomic identity and the pipeline downloads all relevant sequences and identifies clusters using a local-alignment search tool. For more information on `phylotaR` see its [published article](https://doi.org/10.3390/life8020020).
 
 By using `restez` in conjunction with `phylotaR`, we will not only being saving time, but also improving the chances of a successful `phylotaR` run -- often NCBI Entrez limits the number of requests or even rejects requests from IP addresses that are making too many. Note, however, that the gains in using `restez` with `phylotaR` only make sense if you make use of the `restez` database multiple times or if you wish to radically increase the maximum number of sequences to download per taxon (by default it is only 3,000). Also, note that using a `restez` database does not currently eliminate the need for an internet connection. `phylotaR` still needs to look up taxonomic information and must also identify relevant sequence IDs using Entrez (this may change in the future as `restez` develops).
 
@@ -46,18 +46,18 @@ setup(wd = wd, txid = txid, ncbi_dr = ncbi_dr, mxsql = 500)
 # run just the first two stages for this demonstration
 taxise_run(wd)
 #> --------------------------------------------
-#> Starting stage TAXISE: [2022-09-02 06:46:52]
+#> Starting stage TAXISE: [2022-09-05 14:26:07]
 #> --------------------------------------------
 #> Searching taxonomic IDs ...
 #> Downloading taxonomic records ...
 #> . [1-28]
 #> Generating taxonomic dictionary ...
 #> ---------------------------------------------
-#> Completed stage TAXISE: [2022-09-02 06:46:54]
+#> Completed stage TAXISE: [2022-09-05 14:26:09]
 #> ---------------------------------------------
 download_run(wd)
 #> ----------------------------------------------
-#> Starting stage DOWNLOAD: [2022-09-02 06:46:54]
+#> Starting stage DOWNLOAD: [2022-09-05 14:26:09]
 #> ----------------------------------------------
 #> Identifying suitable clades ...
 #> Identified [1] suitable clades.
@@ -68,7 +68,7 @@ download_run(wd)
 #> . . Getting [100 sqs] from restez database...
 #> Successfully retrieved [100 sqs] in total.
 #> -----------------------------------------------
-#> Completed stage DOWNLOAD: [2022-09-02 06:46:58]
+#> Completed stage DOWNLOAD: [2022-09-05 14:26:14]
 #> -----------------------------------------------
 ```
 

diff --git a/vignettes/4_phylotar.Rmd.orig b/vignettes/4_phylotar.Rmd.orig
@@ -10,7 +10,7 @@ knitr::opts_chunk$set(
 )
 ```
 
-In this tutorial we will showcase how a `restez` database can be used to speed up a [phylotaR](https://github.com/ropensci/phylotaR) run. `phylotaR` runs an automated pipeline for identifying ortholgous gene clusters as the first step in a phylogenetic analysis. A user provides a taxonomic identity and the pipeline downloads all relevant sequences and identifies clusters using a local-alignment search tool. For more information on `phylotaR` see its [published article](https://doi.org/10.3390/life8020020).
+In this tutorial we will showcase how a `restez` database can be used to speed up a [phylotaR](https://github.com/ropensci/phylotaR) run. `phylotaR` runs an automated pipeline for identifying orthologous gene clusters as the first step in a phylogenetic analysis. A user provides a taxonomic identity and the pipeline downloads all relevant sequences and identifies clusters using a local-alignment search tool. For more information on `phylotaR` see its [published article](https://doi.org/10.3390/life8020020).
 
 By using `restez` in conjunction with `phylotaR`, we will not only being saving time, but also improving the chances of a successful `phylotaR` run -- often NCBI Entrez limits the number of requests or even rejects requests from IP addresses that are making too many. Note, however, that the gains in using `restez` with `phylotaR` only make sense if you make use of the `restez` database multiple times or if you wish to radically increase the maximum number of sequences to download per taxon (by default it is only 3,000). Also, note that using a `restez` database does not currently eliminate the need for an internet connection. `phylotaR` still needs to look up taxonomic information and must also identify relevant sequence IDs using Entrez (this may change in the future as `restez` develops).
 

diff --git a/vignettes/restez.Rmd b/vignettes/restez.Rmd
@@ -62,7 +62,7 @@ After the download has completed, we need to use `db_create()` to create our loc
 db_create()
 ```
 
-`db_create()` allows a user to specify minimum and maximum sequence lengths. It's always a good idea to limit the number of sequences in a database so that look-up times are faster. If you know you are only interested in certain lengths of sequences it is a good idea to limit the sequences in the database at this stage. This can also be done with the `acc_filter` argument, as described in the ["Tips and Tricks" vigenette](https://docs.ropensci.org/restez/articles/5_tips_and_tricks.html). You can always run `db_create()` again to change the limits. You will simply need to delete the original database first with `db_delete()`.
+`db_create()` allows a user to specify minimum and maximum sequence lengths. It's always a good idea to limit the number of sequences in a database so that look-up times are faster. If you know you are only interested in certain lengths of sequences it is a good idea to limit the sequences in the database at this stage. This can also be done with the `acc_filter` argument, as described in the ["Tips and Tricks" vignette](https://docs.ropensci.org/restez/articles/5_tips_and_tricks.html). You can always run `db_create()` again to change the limits. You will simply need to delete the original database first with `db_delete()`.
 
 ## 1.4 Checking the setup
 
@@ -115,7 +115,7 @@ Additionally, for more flexibility and options for extracting sequence record in
 
 # 3. Entrez
 
-Entrez wrappers are part of the `restez` package. These allow a user to make use of the local GenBank using functions that were built for [`rentrez`](https://github.com/ropensci/rentrez). This minimises the amount of coding changes required for any Entrez dependent code.
+Entrez wrappers are part of the `restez` package. These allow a user to make use of the local GenBank using functions that were built for [`rentrez`](https://github.com/ropensci/rentrez). This minimizes the amount of coding changes required for any Entrez dependent code.
 
 >Currently, only `entrez_fetch()` is available with restez and only text formatted rettypes are allowed.