-
Notifications
You must be signed in to change notification settings - Fork 0
/
apply_family
127 lines (110 loc) · 4.44 KB
/
apply_family
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#Author : Rohan M. Nanaware
#Date C.: 07th Aug 2017
#Date M.: 07th Aug 2017
#Purpose: Understand the apply family, syntax, when to use, etc.
#Ref. : https://www.datacamp.com/community/tutorials/r-tutorial-apply-family#family
{
#Syntax
apply(X, MARGIN, FUN, ...)
# where: X is an array or a matrix if the dimension of the array is 2;
# MARGIN is a variable defining how the function is applied: when MARGIN=1, it applies over rows,
# whereas with MARGIN=2, it works over columns.
# Note that when you use the construct MARGIN=c(1,2), it applies to both rows and columns;
# FUN, which is the function that you want to apply to the data. It can be any R function, including a User Defined Function (UDF).
# Construct a 5x6 matrix
X <- matrix(rnorm(30), nrow=5, ncol=6)
# Sum the values of each column with `apply()`
apply(X, 2, sum)
{
# > X <- matrix(rnorm(30), nrow=5, ncol=6)
# > X
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] -1.94906932 -1.3878370 -0.1958298 -0.53938847 -0.2420091 1.8336326
# [2,] -0.06625763 -0.3724634 -0.2571309 -0.01875991 -0.7428842 -0.4180169
# [3,] 0.44975736 0.2005075 -2.3643830 -0.58094495 -1.7548130 -1.0533094
# [4,] -0.75873543 -0.9859626 0.4494635 -0.71736076 -1.1165637 -0.6567909
# [5,] -1.13292328 1.3785268 -0.1441591 -0.25306394 -1.4937538 -0.3722877
# > apply(X, 2, sum)
# [1] -3.4572283 -1.1672288 -2.5120393 -2.1095180 -5.3500238 -0.6667723
}#res
{
X = iris
head(X)
Y = apply(X, 2, unique)
Y = apply(X, 2, function(x) length(unique(x)))
}#test
}#01. apply()
{
# You want to apply a given function to every element of a list and obtain a list as result.
# When you execute ?lapply, you see that the syntax looks like the apply() function.
# The difference is that: #
# It can be used for other objects like dataframes, lists or vectors; and
# The output returned is a list (which explains the “l” in the function name),
# which has the same number of elements as the object passed to it
# Create a list of matrices
A = iris
B = iris
C = iris
MyList <- list(A,B,C)
# Extract the 2nd column from `MyList` with the selection operator `[` with `lapply()`
lapply(MyList,"[", , 2)
# Extract the 1st row from `MyList`
lapply(MyList,"[", 1, )
{
MyList <- list(X, iris, cars)
#lapply(MyList, function(x) apply(x, 1, length))
lapply(MyList, length)
lapply(iris, length)
lapply(iris[,1], length)
}#test
}#02. lapply()
{
# sapply() is a ‘wrapper’ function for lapply()
# Applying the lapply() function would give us a list, unless you pass simplify=FALSE as parameter to sapply().
# Then, a list will be returned
}#03. sapply()
{
# Initialize `Z`
Z <- sapply(MyList,"[", 1,1 )
# Return `Z`
Z
# Replicate the values of `Z`
Z <- rep(Z,c(3,1,2))
# Return `Z`
Z
# You see that the code above replicates the values of Z a number of times as established by c(3,1,2):
# three times the first, one time the second and two times the third
}#04. rep()
{
# The sweep() function is probably the closest to the apply() family.
# You use it when you want to replicate different actions on the MARGIN elements that you have chosen
# (limiting here to the matrix case).
# A typical scenario occurs in clustering, where you may need to repetitively produce normalized and centered
# or “standardized” data
dataPoints <- matrix(rnorm(25), nrow=5, ncol=5)
# Find means per column with `apply()`
dataPoints_means <- apply(dataPoints, 2, mean)
# Find standard deviation with `apply()`
dataPoints_sdev <- apply(dataPoints, 2, sd)
# Center the points
dataPoints_Trans1 <- sweep(dataPoints, 2, dataPoints_means,"-")
print(dataPoints_Trans1)
# Return the result
dataPoints_Trans1
# You produced the centered points with one call to sweep(). This function expects the following elements:
# an input array, which in this case is a matrix;
# a MARGIN, 2 to indicate the columns;
# a summary statistics (here mean); and
# a function to be applied. You use the arithmetic operator “-” for subtraction
# Normalize
dataPoints_Trans2 <- sweep(dataPoints_Trans1, 2, dataPoints_sdev, "/")
# Return the result
dataPoints_Trans2
{
sweep(dataPoints, 2, c(1,2,3,4,5), "*")
}#test
}#05. sweep()
{
aggregate()
mapply()
}#Pending