# <center>R: Porównanie algorytmów</center>
---
## 1. Konstrukcja modeli i mierników parametrów
### 1.1 Biblioteki i edycja danych
#### 1.1.1 Biblioteki

In [1]:
library(dplyr)
library(data.table)
library(lme4)
library(nlme)
library(mgcv)
library(profmem)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Attaching package: 'data.table'

The following objects are masked from 'package:dplyr':

    between, first, last

Loading required package: Matrix

Attaching package: 'nlme'

The following object is masked from 'package:lme4':

    lmList

The following object is masked from 'package:dplyr':

    collapse

This is mgcv 1.8-28. For overview type 'help("mgcv-package")'.


#### 1.1.2 Wczytanie i edycja danych

In [2]:
data_set <- fread("BIG.csv")
data_set$fac <- as.factor(data_set$fac)

head(data_set)

y,x0,x1,x2,x3,f,f0,f1,f2,f3,fac
7.260973,0.68785741,0.547054,0.6354567,0.6111791,10.956922,1.66169056,2.986517,3.3087140176,0,1
12.88555,0.49219261,0.2736482,0.4806589,0.8204239,12.554521,1.99939842,1.728573,2.8265497071,0,2
20.10515,0.34511557,0.6216723,0.315391,0.9487858,21.393659,1.76787094,3.46719,7.1585974825,0,3
20.716234,0.99504991,0.9231926,0.7437992,0.6957668,20.553333,0.03110107,6.336871,2.1853611574,0,4
15.368211,0.69526717,0.8153862,0.2709855,0.3732315,18.197753,1.63533361,5.107819,8.4546006591,0,1
8.857683,0.01070004,0.1109302,0.9682905,0.4825256,7.315757,0.06721765,1.248397,0.0001426359,0,2


### 1.2 Funkcje
#### 1.2.1 Funkcja tworząca model

In [3]:
LMM <- function(model) {
  if(model == "lme4") {
    LMM_lme4 <- lmer(data=data_set, y ~ x0 + x1 + x2 + (1|fac))
    return(LMM_lme4)
  }
  else if(model == "nlme") {
    LMM_nlme <- lme(data=data_set, y ~ x0 + x1 + x2, random=~1|fac)
    return(LMM_nlme)
  }
  else if(model == "bam") {
    LMM_bam <- bam(data=data_set, y ~ x0 + x1 + x2 + s(fac, bs="re"))
    return(LMM_bam)
  }
}

#### 1.2.2 Funkcja sprawdzająca czas

In [4]:
check_time <- function(model, n) {
  times <- c()

  for(i in 1:n) {
    start_time <- Sys.time()
    LMM(model)
    end_time <- Sys.time()
    times <- c(times, round(end_time - start_time, 4))
    }
    return(times)
  }

#### 1.2.3 Funkcja mierząca ilość zużytego RAMu

In [5]:
check_RAM <- function(model) {
  prof_mem <- profmem({
    LMM(model)
      })
  sum_in_MB <- round(sum(prof_mem$bytes[!is.na(prof_mem$bytes)]) * 1e-6, 4)
  return(sum_in_MB)
}

## 2. Symulacje
### 2.1 Podsumowanie modelu
#### 2.1.1 "lme4"

In [6]:
 summary(LMM("lme4"))

Linear mixed model fit by REML ['lmerMod']
Formula: y ~ x0 + x1 + x2 + (1 | fac)
   Data: data_set

REML criterion at convergence: 5088569

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.6139 -0.6155  0.0215  0.6592  4.2157 

Random effects:
 Groups   Name        Variance Std.Dev.
 fac      (Intercept) 15.038   3.878   
 Residual              9.494   3.081   
Number of obs: 1000000, groups:  fac, 4

Fixed effects:
             Estimate Std. Error  t value
(Intercept) 15.161178   1.938967    7.819
x0           0.007555   0.010672    0.708
x1           5.994844   0.010665  562.101
x2          -5.597065   0.010678 -524.159

Correlation of Fixed Effects:
   (Intr) x0     x1    
x0 -0.003              
x1 -0.003  0.001       
x2 -0.003  0.001  0.001

#### 2.1.2 "nlme"

In [7]:
summary(LMM("nlme"))

Linear mixed-effects model fit by REML
 Data: data_set 
      AIC     BIC   logLik
  5088581 5088651 -2544284

Random effects:
 Formula: ~1 | fac
        (Intercept) Residual
StdDev:    3.877112  3.08118

Fixed effects: y ~ x0 + x1 + x2 
                Value Std.Error     DF   t-value p-value
(Intercept) 15.161161 1.9385805 999993    7.8208   0.000
x0           0.007555 0.0106719 999993    0.7079   0.479
x1           5.994844 0.0106651 999993  562.1015   0.000
x2          -5.597065 0.0106782 999993 -524.1588   0.000
 Correlation: 
   (Intr) x0     x1    
x0 -0.003              
x1 -0.003  0.001       
x2 -0.003  0.001  0.001

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-4.61391621 -0.61545985  0.02150786  0.65921285  4.21567231 

Number of Observations: 1000000
Number of Groups: 4 

#### 2.1.3 "bam"

In [8]:
summary(LMM("bam"))


Family: gaussian 
Link function: identity 

Formula:
y ~ x0 + x1 + x2 + s(fac, bs = "re")

Parametric coefficients:
             Estimate Std. Error  t value Pr(>|t|)    
(Intercept) 15.161161   1.927542    7.866 3.68e-15 ***
x0           0.007555   0.010672    0.708    0.479    
x1           5.994844   0.010665  562.101  < 2e-16 ***
x2          -5.597065   0.010678 -524.159  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
       edf Ref.df      F p-value    
s(fac)   3      3 395842  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =   0.64   Deviance explained =   64%
fREML = 2.5443e+06  Scale est. = 9.4937    n = 1000000

### 2.2 Sprawdzenie czasu konstrukcji modelu
#### 2.2.1 100 razy z użyciem funkcji "check_time(model, n)"

In [9]:
LMM_lme4_times <- check_time("lme4", 100)
LMM_nlme_times <- check_time("nlme", 100)
LMM_bam_times <- check_time("bam", 100)

data.frame(LMM_lme4_times, LMM_nlme_times, LMM_bam_times) %>%
    summarise(lme4_mean = round(mean(LMM_lme4_times), 4),
              lme4_sd = round(sd(LMM_lme4_times), 4),
              nlme_mean = round(mean(LMM_nlme_times), 4),
              nlme_sd = round(sd(LMM_nlme_times), 4),
              bam_mean = round(mean(LMM_bam_times), 4),
              bam_sd = round(sd(LMM_bam_times), 4))

lme4_mean,lme4_sd,nlme_mean,nlme_sd,bam_mean,bam_sd
9.5139,0.3001,11.7934,0.5152,3.8094,0.2296


### 2.3 Sprawdzenie zużytej pamięci RAM
#### 2.3.1 Z użyciem funkcji "check_RAM(model)"

In [11]:
lme4_RAM <- check_RAM("lme4")
nlme_RAM <- check_RAM("nlme")
bam_RAM <- check_RAM("bam")

data.frame(lme4_RAM, nlme_RAM, bam_RAM)

lme4_RAM,nlme_RAM,bam_RAM
1740.168,1724.926,1652.27


#### 2.3.2 Z użyciem wbudowanego w R narzędzia profilowania

https://rpubs.com/kamilpytlak/LMM_BIG_2