/
groupAddAssign.R
166 lines (155 loc) · 7.67 KB
/
groupAddAssign.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#' Add animals to an existing breeding group or forms groups:
#'
## Copyright(c) 2017-2020 R. Mark Sharp
## This file is part of nprcgenekeepr
#' Part of Group Formation
#'
#' @description{
#' \code{groupAddAssign} finds the largest group that can be formed by adding
#' unrelated animals from a set of candidate IDs to an existing group, to a new
#' group it has formed from a set of candidate IDs or if more than 1 group
#' is desired, it finds the set of groups with the largest average size.
#'
#' The function implements a maximal independent set (MIS) algorithm to find
#' groups of unrelated animals. A set of animals may have many different MISs of
#' varying sizes, and finding the largest would require traversing all possible
#' combinations of animals. Since this could be very time consuming, this
#' algorithm produces a random sample of the possible MISs, and selects from
#' these. The size of the random sample is determined by the specified number
#' of iterations.
#' }
#'
#' @return A list with list items \code{group}, \code{score} and optionally
#' \code{groupKin}.
#' The list item \code{group} contains a list of the best group(s) produced
#' during the simulation.
#' The list item \code{score} provides the score associated with the group(s).
#' The list item \code{groupKin} contains the subset of the kinship matrix
#' that is specific for each group formed.
#'
#' @examples
#' \donttest{
#' library(nprcgenekeepr)
#' examplePedigree <- nprcgenekeepr::examplePedigree
#' breederPed <- qcStudbook(examplePedigree, minParentAge = 2,
#' reportChanges = FALSE,
#' reportErrors = FALSE)
#' focalAnimals <- breederPed$id[!(is.na(breederPed$sire) &
#' is.na(breederPed$dam)) &
#' is.na(breederPed$exit)]
#' ped <- setPopulation(ped = breederPed, ids = focalAnimals)
#' trimmedPed <- trimPedigree(focalAnimals, breederPed)
#' probands <- ped$id[ped$population]
#' ped <- trimPedigree(probands, ped, removeUninformative = FALSE,
#' addBackParents = FALSE)
#' geneticValue <- reportGV(ped, guIter = 50, # should be >= 1000
#' guThresh = 3,
#' byID = TRUE,
#' updateProgress = NULL)
#' trimmedGeneticValue <- reportGV(trimmedPed, guIter = 50, # should be >= 1000
#' guThresh = 3,
#' byID = TRUE,
#' updateProgress = NULL)
#' candidates <- trimmedPed$id[trimmedPed$birth < as.Date("2013-01-01") &
#' !is.na(trimmedPed$birth) &
#' is.na(trimmedPed$exit)]
#' haremGrp <- groupAddAssign(candidates = candidates,
#' kmat = trimmedGeneticValue[["kinship"]],
#' ped = trimmedPed,
#' iter = 10, # should be >= 1000
#' numGp = 6,
#' harem = TRUE)
#' haremGrp$group
#' sexRatioGrp <- groupAddAssign(candidates = candidates,
#' kmat = trimmedGeneticValue[["kinship"]],
#' ped = trimmedPed,
#' iter = 10, # should be >= 1000
#' numGp = 6,
#' sexRatio = 9)
#' sexRatioGrp$group
#' }
#' @param candidates Character vector of IDs of the animals available for
#' use in forming the groups. The animals that may be present in
#' \code{currentGroups} are not included within \code{candidates}.
#' @param currentGroups List of character vectors of IDs of animals currently
#' assigned to groups.
#' Defaults to a list with character(0) in each sublist element (one for each
#' group being formed) assuming no groups are prepopulated.
#' @param kmat Numeric matrix of pairwise kinship values. Rows and columns
#' are named with animal IDs.
#' @param ped Dataframe that is the `Pedigree`. It contains pedigree
#' information including the IDs listed in \code{candidates}.
#' @param threshold Numeric value indicating the minimum kinship level to be
#' considered in group formation. Pairwise kinship below this level will be
#' ignored. The default value is 0.015625.
#' @param ignore List of character vectors representing the sex combinations
#' to be ignored. If provided, the vectors in the list specify if pairwise
#' kinship should be ignored between certain sexes.
#' Default is to ignore all pairwise kinship between females.
#' @param minAge Integer value indicating the minimum age to consider in group
#' formation. Pairwise kinships involving an animal of this age or younger will
#' be ignored. Default is 1 year.
#' @param iter Integer indicating the number of times to perform the random
#' group formation process. Default value is 1000 iterations.
#' @param numGp Integer value indicating the number of groups that should be
#' formed from the list of IDs. Default is 1.
#' @param harem Logical variable when set to \code{TRUE}, the formed groups
#' have a single male at least \code{minAge} old.
#' @param sexRatio Numeric value indicating the ratio of females to males x
#' from 0.5 to 20 by increments of 0.5.
#' @param withKin Logical variable when set to \code{TRUE}, the kinship
#' matrix for the group is returned along with the group and score.
#' Defaults to not return the kinship matrix. This maintains compatibility with
#' earlier versions.
#' @param updateProgress Function or NULL. If this function is defined, it
#' will be called during each iteration to update a
#' \code{shiny::Progress} object.
#'
#' @export
groupAddAssign <- function(candidates, currentGroups = list(character(0)), kmat,
ped,
threshold = 0.015625, ignore = list(c("F", "F")),
minAge = 1, iter = 1000,
numGp = 1, harem = FALSE,
sexRatio = 0, withKin = FALSE,
updateProgress = NULL) {
if (length(currentGroups) > numGp)
stop(paste0("Cannot have more groups with seed animals than number of ",
"groups to be formed."))
kmat <- filterKinMatrix(union(candidates, unlist(currentGroups)), kmat)
kin <- getAnimalsWithHighKinship(kmat, ped, threshold, currentGroups, ignore,
minAge)
# Filtering out candidates related to current group members
conflicts <- unique(c(unlist(kin[unlist(currentGroups)]),
unlist(currentGroups)))
candidates <- setdiff(candidates, conflicts)
kin <- addAnimalsWithNoRelative(kin, candidates)
if (harem) {
if (length(getPotentialSires(candidates, minAge, ped)) < numGp) {
stop(paste0("User selected to form ", numGp, " harems with only ",
length(getPotentialSires(candidates, minAge, ped)),
" males at least ",
minAge, " years old in the list of candidates."))
}
}
# Starting the group assignment simulation
savedScore <- -1
savedGroupMembers <- list()
for (k in 1:iter) {
groupMembers <- fillGroupMembers(candidates, currentGroups, kin, ped, harem,
minAge, numGp, sexRatio)
# Score the resulting groups
score <- min(vapply(groupMembers, length, integer(1)))
if (score > savedScore) {
savedGroupMembers <- groupMembers
savedScore <- score
}
# Updating the progress bar, if applicable
if (!is.null(updateProgress)) {
updateProgress()
}
}
savedGroupMembers <- addGroupOfUnusedAnimals(savedGroupMembers, candidates,
ped, minAge, harem)
groupMembersReturn(savedGroupMembers, savedScore, withKin, kmat)
}