学习一些R语言高级编程技术。

# 面向对象的编程

In [1]:
summary(women)

     height         weight     
 Min.   :58.0   Min.   :115.0  
 1st Qu.:61.5   1st Qu.:124.5  
 Median :65.0   Median :135.0  
 Mean   :65.0   Mean   :136.7  
 3rd Qu.:68.5   3rd Qu.:148.0  
 Max.   :72.0   Max.   :164.0  

In [2]:
fit <- lm(weight~height, data=women)
summary(fit)


Call:
lm(formula = weight ~ height, data = women)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7333 -1.1333 -0.3833  0.7417  3.1167 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -87.51667    5.93694  -14.74 1.71e-09 ***
height        3.45000    0.09114   37.85 1.09e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.525 on 13 degrees of freedom
Multiple R-squared:  0.991,	Adjusted R-squared:  0.9903 
F-statistic:  1433 on 1 and 13 DF,  p-value: 1.091e-14


In [3]:
summary

In [4]:
class(women)

In [5]:
class(fit)

In [6]:
methods(summary)

 [1] summary.aov                         summary.aovlist*                   
 [3] summary.aspell*                     summary.check_packages_in_dir*     
 [5] summary.connection                  summary.data.frame                 
 [7] summary.Date                        summary.default                    
 [9] summary.ecdf*                       summary.factor                     
[11] summary.glm                         summary.infl*                      
[13] summary.lm                          summary.loess*                     
[15] summary.manova                      summary.matrix                     
[17] summary.mlm*                        summary.nls*                       
[19] summary.packageStatus*              summary.POSIXct                    
[21] summary.POSIXlt                     summary.ppr*                       
[23] summary.prcomp*                     summary.princomp*                  
[25] summary.proc_time                   summary.rlang:::list_of_conditions*

In [1]:
# 定义泛型函数
mymethod <- function(x, ...) UseMethod("mymethod")
mymethod.default <- function(x, ...) print("default method")
mymethod.a <- function(x, ...) print("method for class a")
mymethod.b <- function(x, ...) print("method for class b")

# 给对象分配类
x <- 1:5
y <- 6:10
z <- 11:15
class(x) <- "a"
class(y) <- "b"

# 把泛型函数应用到对象上
mymethod(x)
mymethod(y)
mymethod(z)

# 将泛型函数应用到包含两个类的对象上
class(z) <- c("a", "b")
mymethod(z)

# 泛型函数没有默认为"c"的类
class <- c("c", "a", "b")
mymethod(z)

[1] "method for class a"
[1] "method for class b"
[1] "default method"
[1] "method for class a"
[1] "method for class a"


In [2]:
class(women) <- "lm"
summary(women)

ERROR: Error in if (p == 0) {: argument is of length zero


# 编写有效的代码

In [10]:
set.seed(1234)
mymatrix <- matrix(rnorm(10000000), ncol=10)

In [12]:
accum <- function(x){
    sums <- numeric(ncol(x))
    for (i in 1:col(x)){
        for (j in 1:nrow(x)){
            sums[i] <- sums[i] + x[j, i]
        }
    }
}

In [13]:
print(system.time(accum(mymatrix)))
print(system.time(colSums(mymatrix)))

"numerical expression has 10000000 elements: only the first used"


   user  system elapsed 
   0.03    0.00    0.08 
   user  system elapsed 
   0.00    0.00    0.01 


In [2]:
set.seed(1234)
k <- 100000
x <- rnorm(k)

In [3]:
y <- 0
system.time(
    for (i in 1:length(x)) y[i]  <- x[i]^2
)

   user  system elapsed 
   0.00    0.00    0.02 

In [5]:
y <- numeric(length=k)
system.time(
    for (i in 1:k) y[i]  <- x[i]^2
)

   user  system elapsed 
   0.00    0.00    0.02 

In [4]:
y <- numeric(length=k)
system.time(y <- x^2)

   user  system elapsed 
      0       0       0 

In [10]:
install.packages("doParallel")

package 'doParallel' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\86151\AppData\Local\Temp\RtmpUN9I8M\downloaded_packages


In [13]:
# 加载包并登记内核数量
library(foreach)
library(doParallel)
registerDoParallel(cores=6)

# 定义函数
eig <- function(n, p){
    x <- matrix(rnorm(100000), ncol=100)
    r <- cor(x)
    eigen(r)$values
}

# 赋值
n <- 1000000
p <- 100
k <- 500

# 正常执行
system.time(
    x <- foreach(i=1:k, .combine=rbind) %do% eig(n, p)
)

# 并行执行
system.time(
    x <- foreach(i=1:k, .combine=rbind) %dopar% eig(n, p)
)

   user  system elapsed 
   2.48    0.20    5.82 

   user  system elapsed 
   0.00    0.00    1.55 

# 调试

In [14]:
mtcars$Transmission  <- factor(mtcars$a, levels=c(1, 2), labels=c("auto", "manual"))
aov(mpg~Transmission, data=mtcars)

ERROR: Error in `contrasts<-`(`*tmp*`, value = contr.funs[1 + isOF[nn]]): contrasts can be applied only to factors with 2 or more levels


In [15]:
head(mtcars[c("mpg", "Transmission")])
table(mtcars$Transmission)

Unnamed: 0_level_0,mpg,Transmission
Unnamed: 0_level_1,<dbl>,<fct>
Mazda RX4,21.0,auto
Mazda RX4 Wag,21.0,auto
Datsun 710,22.8,auto
Hornet 4 Drive,21.4,
Hornet Sportabout,18.7,
Valiant,18.1,



  auto manual 
    13      0 

In [16]:
args(mad)

In [17]:
# 设置调试参数
debug(mad)
mad(1:10)

debugging in: mad(1:10)
debug: {
    if (na.rm) 
        x <- x[!is.na(x)]
    n <- length(x)
    constant * if ((low || high) && n%%2 == 0) {
        if (low && high) 
            stop("'low' and 'high' cannot be both TRUE")
        n2 <- n%/%2 + as.integer(high)
        sort(abs(x - center), partial = n2)[n2]
    }
    else median(abs(x - center))
}
debug: if (na.rm) x <- x[!is.na(x)]
debug: n <- length(x)
debug: constant * if ((low || high) && n%%2 == 0) {
    if (low && high) 
        stop("'low' and 'high' cannot be both TRUE")
    n2 <- n%/%2 + as.integer(high)
    sort(abs(x - center), partial = n2)[n2]
} else median(abs(x - center))
debug: median(abs(x - center))
exiting from: mad(1:10)


ERROR: Error: object 'Browse' not found
