Skip to content

Commit

Permalink
Merge pull request #7 from mrc-ide/develop
Browse files Browse the repository at this point in the history
Merge Develop into main and add vcf2long() function to pkg
  • Loading branch information
shaziaruybal committed Dec 14, 2023
2 parents a037fc4 + 4035fc1 commit e7c46e3
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 14 deletions.
14 changes: 0 additions & 14 deletions .Rproj.user/shared/notebooks/paths

This file was deleted.

7 changes: 7 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ BugReports: https://github.com/mrc-ide/PGEhammer/issues
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
Imports:
dplyr,
rlang,
stringr,
tibble,
tidyr,
vcfR
14 changes: 14 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Generated by roxygen2: do not edit by hand

export(square)
export(vcf2long)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,relocate)
importFrom(dplyr,rowwise)
importFrom(dplyr,ungroup)
importFrom(rlang,.data)
importFrom(stringr,str_split)
importFrom(tibble,rownames_to_column)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,unnest)
importFrom(vcfR,extract.gt)
importFrom(vcfR,is.biallelic)
60 changes: 60 additions & 0 deletions R/data_wrangle_vcf2long.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#------------------------------------------------
#' @title Convert vcf to long format
#'
#' @description Convert a vcf into a long format data frame with sample ID, locus, alleles and read counts for each allele.
#'
#' @param vcf object of class vcfR
#'
#' @importFrom vcfR extract.gt is.biallelic
#' @importFrom tibble rownames_to_column
#' @importFrom tidyr pivot_longer unnest
#' @importFrom dplyr rowwise mutate group_by relocate n ungroup
#' @importFrom stringr str_split
#' @importFrom rlang .data
#' @export
#' @examples
#'

vcf2long <- function(vcf) {

# check inputs
assert_class(vcf, "vcfR")

# print message to console
message("Converting from vcf to long format...")

# extract allele counts
ad <- t(extract.gt(vcf, element = 'AD'))

# make df and into long format
counts_df <- ad |>
as.data.frame() |>
rownames_to_column("sample_id") |>
pivot_longer(cols = -.data$sample_id, names_to = "locus", values_to = "read_count")

# unnest read_count
long_df <- counts_df |>
rowwise() |>
# split all read count values
mutate(read_count = list(str_split(.data$read_count, ",")[[1]])) |>
unnest(cols = .data$read_count) |>
# make read_count numeric
mutate(read_count = as.numeric(.data$read_count)) |>
group_by(.data$sample_id, .data$locus) |>
# create new variable 'allele' and make read_count numeric
mutate(allele = paste0("allele-", rep(1:n()))) |>
ungroup() |>
relocate(.data$allele, .before = .data$read_count)

message("Reformatting complete.")

# Check if any loci are not biallelic and record how many
n_not_biallelic <- length(which(!is.biallelic(vcf)))

# If the vcf is not biallelic, display a warning message
if(n_not_biallelic > 0){
warning("Your vcf is not all bi-allelic. Make sure to double check if this is not expected.")
}

return(long_df)
}
14 changes: 14 additions & 0 deletions man/vcf2long.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions tests/testthat/test-vcf2long.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
test_that("vcf2long works", {
expect_error(vcf2long(3))
})

0 comments on commit e7c46e3

Please sign in to comment.