# Benchmark study: Modifying data within groups

The following makes use of the packages *data.table*, *dplyr*, *memisc*, *rbenchmark*. You may need to
install them from [CRAN](https://cran.r-project.org) using the code
`install.packages(c("data.table","dplyr","memisc","rbenchmark"))` if you want to run this on your computer. (The packages are already installed on the notebook container, however.)

In [1]:
library(data.table)

In [2]:
library(memisc)

Loading required package: lattice
Loading required package: MASS

Attaching package: ‘memisc’

The following objects are masked from ‘package:stats’:

    contr.sum, contr.treatment, contrasts

The following object is masked from ‘package:base’:

    as.array



In [3]:
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:memisc’:

    collect, recode, rename, syms

The following object is masked from ‘package:MASS’:

    select

The following objects are masked from ‘package:data.table’:

    between, first, last

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [4]:
library(rbenchmark)

In [5]:
load("BData.RData")
load("SData.RData")

In [6]:
grouped_modification_benchmark_1 <- benchmark(
    within =
        within(BDataF,{
            X1c <- X1 - ave(X1, a,b,FUN = mean)
            X2c2 <- (X2 - ave(X2, a,b,FUN = mean))^2
            X3ca <- abs(X3 - ave(X3,a,b,FUN = median))
            X4cm <- X4 - ave(X4,a,b,FUN = max)
        }),
    data.table =
        BDataT[,`:=`(X1c = X1 - mean(X1),
                     X2c2 = (X2 - mean(X2))^2,
                     X3ca = abs(X3 - median(X3)),
                     X4cm = X4 - max(X4)),
               by = .(a,b)],
    `group_by + mutate` =
        BDTbl %>% group_by(a,b) %>%
        mutate(X1c = X1 - mean(X1),
               X2c2 = (X2 - mean(X2))^2,
               X3ca = abs(X3 - median(X3)),
               X4cm = X4 - max(X4)),
    withinGroups =
        withinGroups(BDataF, ~a+b, {
            X1c <- X1 - mean(X1)
            X2c2 <- (X2 - mean(X2))^2
            X3ca <- abs(X3 - median(X3))
            X4cm <- X4 - max(X4)
        }),
    columns = c("test","user.self","relative"),
    replications = 100,
    order = NULL,
    relative = "user.self"
)

In [7]:
grouped_modification_benchmark_2 <- benchmark(
    within =
        within(SDataF,{
            X1c <- X1 - ave(X1, a,b,FUN = mean)
            X2c2 <- (X2 - ave(X2, a,b,FUN = mean))^2
            X3ca <- abs(X3 - ave(X3,a,b,FUN = median))
            X4cm <- X4 - ave(X4,a,b,FUN = max)
        }),
    data.table =
        SDataT[,`:=`(X1c = X1 - mean(X1),
                     X2c2 = (X2 - mean(X2))^2,
                     X3ca = abs(X3 - median(X3)),
                     X4cm = X4 - max(X4)),
               by = .(a,b)],
    `group_by + mutate` =
        SDTbl %>% group_by(a,b) %>%
        mutate(X1c = X1 - mean(X1),
               X2c2 = (X2 - mean(X2))^2,
               X3ca = abs(X3 - median(X3)),
               X4cm = X4 - max(X4)),
    withinGroups =
        withinGroups(SDataF, ~a+b, {
            X1c <- X1 - mean(X1)
            X2c2 <- (X2 - mean(X2))^2
            X3ca <- abs(X3 - median(X3))
            X4cm <- X4 - max(X4)
        }),
    columns = c("test","user.self","relative"),
    replications = 100,
    order = NULL,
    relative = "user.self"
)

In [8]:
save(grouped_modification_benchmark_1,
     grouped_modification_benchmark_2,
     file="grouped-modification-benchmark.RData")