/
EI-index.R
177 lines (151 loc) · 6.96 KB
/
EI-index.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
http://www.archaeologysouthwest.org/pdf/e-i_index_script.txt
# Feel free to use but please cite:
# Borck, Mills, Peeples, and Clark 2015 Are Social Networks Survival Networks? An Example from the Late Prehispanic U.S. Southwest, Journal of Archaeological Method and Theory 22(1):33-57
####################################################################################################
## Overview ########################################################################################
####################################################################################################
# This script calculates E-I (external-internal) indeces for both similarity matrices or binary
# networks. The output includes standard and normalized E-I scores by site, standard and normalized
# E-I scores by group (region), and the actual and expected E-I value for the dataset as a whole
# based on a permutation test.
#
# This script expects two inputs. The first is a symmetric similarity matrix or binary network
# designated as 'sim' in R. The second is a vector or matrix of group (region) designations for
# every row in the 'sim' matrix defined as 'grp'.
#
####################################################################################################
## Setup ###########################################################################################
####################################################################################################
####### initialize necessary libraries
library(sqldf)
library(statnet)
####################################################################################################
## E-I Index #######################################################################################
####################################################################################################
## Function for calculting the E-I index for all sites in the database
e.i.test <- function(x,y) {
reg <- as.matrix(y) # reg is the regional or group designation
rd <- dim(x)[1]
results <- matrix(0,rd,3)
for (s1 in 1:rd) {
for (s2 in 1:rd) {
x1Temp <- reg[s1,1]
x2Temp <- reg[s2,1]
if (x1Temp==x2Temp) {results[s1,2] <- results[s1,2]+x[s1,s2]}
if (x1Temp!=x2Temp) {results[s1,1] <- results[s1,1]+x[s1,s2]}}}
for (j in 1:nrow(x)){
if (length(which(x<1 & x>0))>0)
{results[j,2] <- results[j,2]-1}
results[j,3] <- (results[j,1]-results[j,2]) / (results[j,1]+results[j,2])}
row.names(results) <- row.names(x)
results <- round(results,3)
out <- as.data.frame(cbind(reg,results))
colnames(out) <- c('Region','E','I','E-I Index')
return(out)}
## Function for calculting normalized version of the E-I index for all sites in the database
e.i.norm <- function(x,y) {
reg <- as.matrix(y)
rd <- dim(x)[1]
results <- matrix(0,rd,3)
for (s1 in 1:rd) {
for (s2 in 1:rd) {
x1Temp <- reg[s1,1]
x2Temp <- reg[s2,1]
if (x1Temp==x2Temp) {results[s1,2] <- results[s1,2]+x[s1,s2]}
if (x1Temp!=x2Temp) {results[s1,1] <- results[s1,1]+x[s1,s2]}}}
if (length(which(x<1 & x>0))>0)
{results[,2] <- results[,2]-1}
results[,1] <- results[,1]/max(results[,1])
results[,2] <- results[,2]/max(results[,2])
results[,3] <- (results[,1]-results[,2]) / (results[,1]+results[,2])
row.names(results) <- row.names(x)
results <- round(results,3)
out <- as.data.frame(cbind(reg,results))
colnames(out) <- c('Region','E','I','E-I Index')
return(out)}
## Function for calculating and standardizing group (region) level E-I index
e.i.group <- function(x,y) {
EIst <- NULL
EI <- NULL
Est <- NULL
Ist <- NULL
out <- sqldf('SELECT x.Region, Sum(x.E) AS E, Sum(x.I) AS I
FROM x
GROUP BY x.Region')
grp2 <- as.matrix(table(y))
grp2 <- as.matrix(grp2[which(grp2[,1]>0),])
for (i in 1:nrow(out)) {
Est[i] <- out[i,2]/(nrow(x)-grp2[i,1])
Ist[i] <- out[i,3]/(grp2[i,1])}
EI <- (out[,2]-out[,3])/(out[,2]+out[,3])
EIst <- (Est-Ist)/(Est+Ist)
EI <- round(EI,3)
Est <- round(Est,3)
Ist <- round(Ist,3)
EIst <- round(EIst,3)
out <- cbind(grp2,out,EI,Est,Ist,EIst)
colnames(out) <- c('NumSites','Region','E','I','E-I Index','E stand','I stand','E-I stand')
return(out)}
####################################################################################################
## E-I Permutation Test ############################################################################
####################################################################################################
## Function for calculating E-I index for n simulations using randomized versions of original input dataset (non-normalized)
## This may take several seconds to minutes depending on the size of the databse and the number of random runs selected
e.i.perm <- function(x,y) {
nsim <- 500 ## number of simulations
E <- NULL
I <- NULL
out <- NULL
EI <- NULL
final <- matrix(0,1,2)
for (i in 1:nsim) {
z <- sample(nrow(x),replace=F)
x2 <- x
x <- x[z,z]
reg <- as.matrix(y)
rd <- dim(x)[1]
results <- matrix(0,rd,3)
for (s1 in 1:rd) {
for (s2 in 1:rd) {
x1Temp <- reg[s1,1]
x2Temp <- reg[s2,1]
if (x1Temp==x2Temp) {results[s1,2] <- results[s1,2]+x[s1,s2]}
if (x1Temp!=x2Temp) {results[s1,1] <- results[s1,1]+x[s1,s2]}}}
if (length(which(x<1 & x>0))>0) {
for (j in 1:nrow(x)){
results[j,2] <- results[j,2]-1}}
E <- sum(results[,1])
I <- sum(results[,2])
out[i] <- (E-I)/(E+I)
out <- round(out,3)}
results2 <- matrix(0,rd,3)
for (s1 in 1:rd) {
for (s2 in 1:rd) {
x1Temp <- reg[s1,1]
x2Temp <- reg[s2,1]
if (x1Temp==x2Temp) {results2[s1,2] <- results2[s1,2]+x2[s1,s2]}
if (x1Temp!=x2Temp) {results2[s1,1] <- results2[s1,1]+x2[s1,s2]}}}
if (length(which(x<1 & x>0))>0)
{for (j in 1:nrow(x)){
results2[j,2] <- results2[j,2]-1}}
E <- sum(results2[,1])
I <- sum(results2[,2])
EI <- (E-I)/(E+I)
final <- cbind(EI,mean(out))
colnames(final) <- c('Actual EI','Expected EI')
return(final)}
####################################################################################################
### Function Calls #################################################################################
####################################################################################################
## sim == symmetric similarity matrix or binary network
## grp == matrix or vector of group (region) designations for every row of sim
# Calculate E-I index by site
site_ei <- e.i.test(sim,grp)
# Calculate normalized E-I index by site
norm_ei <- e.i.norm(sim,grp)
# Calculate and standardize group (region) level E-I index
group_ei <- e.i.group(site_ei,grp)
group_ei_norm <- e.i.group(norm_ei,grp) # use this for normalized scores
# Calculate actual and expected E-I values for dataset based on permutation test
ei_perm <- e.i.perm(sim,grp)
## End of Script