Skip to content

Commit

Permalink
moved DCF-2015 data to separate file
Browse files Browse the repository at this point in the history
  • Loading branch information
beanumber committed Aug 23, 2016
1 parent 21a57a1 commit b5fef9e
Show file tree
Hide file tree
Showing 13 changed files with 95 additions and 83 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mdsr
Title: Complement to Forthcoming Book: Modern Data Science with R
Version: 0.1.2.9007
Version: 0.1.2.9008
Date: 2016-06-23
Authors@R: as.person(c(
"Ben Baumer <ben.baumer@gmail.com> [aut, cre]",
Expand Down
25 changes: 19 additions & 6 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,19 @@
#' \item{WPct}{winning percentage}
#' \item{attendance}{number of fans in attendance}
#' \item{normAttend}{number of fans in attendance, relative to the team with
#'#'the highest attendance in this sample (the 2008 New York Yankees)}
#' the highest attendance in this sample (the 2008 New York Yankees)}
#' \item{payroll}{the sum of the salaries of the players on each team. Note
#'#'that this number is only an estimate of the actual team payroll -- and may
#'#'not even be a very good one. Salaries are accumulated from \code{\link[Lahman]{Salaries}}}
#' that this number is only an estimate of the actual team payroll -- and may
#' not even be a very good one. Salaries are accumulated from \code{\link[Lahman]{Salaries}}}
#' \item{metroPop}{the size of the team's home city's metropolitan population, according
#'#'to Wikipedia and the 2010 US Census}
#' to Wikipedia and the 2010 US Census}
#' \item{name}{the full name of the team}
#' }
#'
#' @seealso \code{\link[Lahman]{Teams}}
#'
#' @source The \code{\link[Lahman]{Teams}} data set and \url{https://en.wikipedia.org/wiki/List_of_Metropolitan_Statistical_Areas}
#' @source The \code{\link[Lahman]{Teams}} table from \code{\link{Lahman}}
#' and \url{https://en.wikipedia.org/wiki/List_of_Metropolitan_Statistical_Areas}
#'
"MLB_teams"

Expand Down Expand Up @@ -66,8 +67,20 @@
#'
"CIACountries"

#' election statistics
#' Election Statistics
#' @docType data
#' @format An object of class \\code{tbl_df} (inherits from \\code{tbl}, \\code{data.frame})
#' with 117 rows and 13 columns.
#' \describe{
#' \item{Ward}{Name of the country}
#' \item{Precinct}{number of people, 2119}
#' \item{Registered.Voters.at.7am}{area (sq km), 2147}
#' \item{Voters.Registering.at.Polls}{Crude oil - production (bbl/day), 2241}
#' \item{gdp}{Gross Domestic Product per capita ($/person), 2001}
#' \item{educ}{education spending (\% of GDP), 2206}
#' \item{roadways}{Roadways per unit area (km/sq km), 2085}
#' \item{net_users}{Fraction of Internet users (\% of population), 2153}
#' }
"Elections"

#' Cherry Blossom runs
Expand Down
15 changes: 5 additions & 10 deletions R/scidb.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
#' src_scidb
#'
#' @description Connect to the scidb server at Smith
#' @description Connect to the scidb server at Smith College.
#'
#' @param dbname the name of the database to which you want to connect
#' @param ... arguments passed to \code{\link[dplyr]{src_mysql}} or \code{\link[DBI]{dbConnect}}
#'
#' @details this is a public, read-only account. Any abuse will be considered a
#' @details This is a public, read-only account. Any abuse will be considered a
#' hostile act.
#'
#' @return For \code{\link{src_mysql}}, a \code{\link[dplyr]{src_mysql}} object
Expand All @@ -26,8 +25,6 @@ src_scidb <- function(dbname, ...) {
user = "mth292", password = "RememberPi")
}

#' Create a DBI connection to the scidb server at Smith
#'
#' @rdname src_scidb
#' @export
#'
Expand All @@ -47,9 +44,6 @@ dbConnect_scidb <- function(dbname, ...) {
user = "mth292", password = "RememberPi")
}

#' Create a string of arguments for the scidb server at Smith to be used on the
#' command line
#'
#' @rdname src_scidb
#' @export
#'
Expand All @@ -59,8 +53,9 @@ dbConnect_scidb <- function(dbname, ...) {
#' @seealso \code{\link[knitr]{opts_chunk}}
#' @examples
#'
#' library(knitr)
#' opts_chunk$set(engine.opts = mysql_scidb("airlines"))
#' if (require(knitr)) {
#' opts_chunk$set(engine.opts = mysql_scidb("airlines"))
#' }

mysql_scidb <- function(dbname, ...) {
paste("-h scidb.smith.edu -u mth292 -pRememberPi -t", dbname)
Expand Down
44 changes: 44 additions & 0 deletions data-raw/DCF-2015.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Elections
url <- "https://raw.githubusercontent.com/dtkaplan/DCF-2015/master/Book/Sections/Data/electionStatistics.csv"
download.file(url, destfile = "inst/extdata/electionStatistics.csv")

Elections <- readr::read_csv("inst/extdata/electionStatistics.csv")
save(Elections, file = "data/Elections.rda", compress = "xz")

# Cherry
url <- "https://raw.githubusercontent.com/dtkaplan/DCF-2015/master/Book/Sections/Data/Cherry-Blossom-Longitudinal.csv"
Cherry <- read.csv(url)
Cherry <- readr::read_csv("inst/extdata/cherry.csv")
save(Cherry, file = "data/Cherry.rda", compress = "xz")

# nciNetwork
url <- "https://raw.githubusercontent.com/dtkaplan/DCF-2015/master/Book/Sections/Data/nciNetwork.csv?token=ACpUw4Vh3a3YvWLDcGd9nLbnBX6ZiFUlks5XxNu_wA%3D%3D"
download.file(url, destfile = "inst/extdata/nciNetwork.csv")
Cancer <- readr::read_csv("inst/extdata/nciNetwork.csv")
save(Cancer, file = "data/Cancer.rda", compress = "xz")


# VotesS1-tally
# download.file("https://raw.githubusercontent.com/dtkaplan/DCF-2015/master/Book/Sections/Data/VotesS1-tally.csv", "inst/extdata/VotesS1-tally.csv")
Votes <- read.csv("inst/extdata/VotesS1-tally.csv", skip = 1) %>%
rename(bill = VOTE) %>%
tidyr::gather(key = "name", value = "vote", -bill) %>%
mutate(name = gsub("\\.\\.", ", ", name)) %>%
mutate(name = gsub("\\.", " ", name))
Encoding(levels(Votes$bill)) <- "latin1"
levels(Votes$bill) <- iconv(levels(Votes$bill), from = "latin1", to = "UTF-8")
Parties <- read.csv("inst/extdata/VotesS1-tally.csv",
header = FALSE, nrows = 2, row.names = 1,
stringsAsFactors = FALSE) %>%
t() %>%
as.data.frame() %>%
rename(party = PARTY, name = VOTE) %>%
mutate(party = as.character(party), name = as.character(name)) %>%
mutate(name = gsub("-", " ", name))
# check to see that all names match
setdiff(Parties$name, unique(Votes$name))
save(Votes, file = "data/Votes.rda", compress = "xz")
save(Parties, file = "data/Parties.rda", compress = "xz")



27 changes: 0 additions & 27 deletions data-raw/DataComputing.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@ cat(create_roxygen(parse_file("https://raw.githubusercontent.com/DataComputing/D
load("data/Minneapolis2013.rda")
save(Minneapolis2013, file = "data/Minneapolis2013.rda", compress = "xz")


# NCI for graph
Cancer <- read.csv("inst/extdata/nciNetwork.csv")
save(Cancer, file = "data/Cancer.rda", compress = "xz")
download.file("https://github.com/DataComputing/DataComputing/raw/master/R/NetworkHelpers.R", "R/NetworkHelpers.R")

# WorldCities
download.file("https://github.com/DataComputing/DataComputing/raw/master/data/WorldCities.rda", "data/WorldCities.rda")
cat(create_roxygen(parse_file("https://raw.githubusercontent.com/DataComputing/DataComputing/master/man/WorldCities.Rd")), sep = "\n")
Expand All @@ -61,24 +55,3 @@ Encoding(WorldCities$name) <- "latin1"
WorldCities$name <- iconv(WorldCities$name, from = "latin1", to = "UTF-8")
save(WorldCities, file = "data/WorldCities.rda", compress = "xz")

# VotesS1-tally
# download.file("https://raw.githubusercontent.com/dtkaplan/DCF-2015/master/Book/Sections/Data/VotesS1-tally.csv", "inst/extdata/VotesS1-tally.csv")
Votes <- read.csv("inst/extdata/VotesS1-tally.csv", skip = 1) %>%
rename(bill = VOTE) %>%
tidyr::gather(key = "name", value = "vote", -bill) %>%
mutate(name = gsub("\\.\\.", ", ", name)) %>%
mutate(name = gsub("\\.", " ", name))
Encoding(levels(Votes$bill)) <- "latin1"
levels(Votes$bill) <- iconv(levels(Votes$bill), from = "latin1", to = "UTF-8")
Parties <- read.csv("inst/extdata/VotesS1-tally.csv",
header = FALSE, nrows = 2, row.names = 1,
stringsAsFactors = FALSE) %>%
t() %>%
as.data.frame() %>%
rename(party = PARTY, name = VOTE) %>%
mutate(party = as.character(party), name = as.character(name)) %>%
mutate(name = gsub("-", " ", name))
# check to see that all names match
setdiff(Parties$name, unique(Votes$name))
save(Votes, file = "data/Votes.rda", compress = "xz")
save(Parties, file = "data/Parties.rda", compress = "xz")
21 changes: 0 additions & 21 deletions data-raw/electionStatistics.R

This file was deleted.

Binary file modified data/Cancer.rda
Binary file not shown.
Binary file modified data/Cherry.rda
Binary file not shown.
Binary file modified data/Elections.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/Cancer.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 14 additions & 3 deletions man/Elections.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions man/MLB_teams.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 5 additions & 9 deletions man/src_scidb.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit b5fef9e

Please sign in to comment.