Skip to content

Commit

Permalink
Disparity
Browse files Browse the repository at this point in the history
  • Loading branch information
mguevara committed Sep 15, 2015
1 parent 177cab5 commit 1b35828
Show file tree
Hide file tree
Showing 12 changed files with 260 additions and 77 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
@@ -0,0 +1,2 @@
^.*\.Rproj$
^\.Rproj\.user$
3 changes: 3 additions & 0 deletions .gitignore
@@ -0,0 +1,3 @@
.Rproj.user
.Rhistory
.RData
22 changes: 14 additions & 8 deletions DESCRIPTION
@@ -1,11 +1,17 @@
Package: diversity
Title: Diversity Measures Package
Version: 0.1
Authors: Miguel Guevara <miguel.guevara@upla.cl>, Dominik Hartmann
Package: diveR
Title: An R Package to Compute Diversity Measures
Version: 0.4
Author: Miguel R. Guevara <miguel.guevara@upla.cl>, Dominik Hartmann
<d.hartmann@uni-hohenheim.de>, Marcelo Mendoza
<marcelo.mendoza@usm.cl>
Description: It implements a number of diversity measures for data analysis
Depends: proxy
License: GPL
Maintainer: Miguel R. Guevara <miguel.guevara@upla.cl>
Description: It implements a number of diversity measures for data analysis considering three dimensions: Variety, Balance and Disparity.
Depends: proxy,pheatmap,reshape2,foreign
URL: https://github.com/mguevara/diveR
Repository: CRAN
Date: 2015-09-18 16:34:12
BugReports: https://github.com/mguevara/diveR/issues
License: MIT
LazyData: true
NeedsCompilation: no
Packaged: 2015-08-17 21:26:31 UTC; mmendoza
Packaged: 2015-09-13 21:26:31 UTC; mmendoza
213 changes: 149 additions & 64 deletions R/diversity.R
Expand Up @@ -105,7 +105,7 @@ diversity <- function(data, type="all", method='euclidean', agg_type=NULL, q=0,
diversity <- merge(diversity,m_d, by=0, all=TRUE)
rownames(diversity) <- diversity$Row.names; diversity$Row.names <- NULL
}
if(type == 'inverse-simpson' || type=='inv' || type == 'all') {
if(type == 'inverse-simpson' || type=='is' || type == 'all') {
q <- 2
p <- 1/(1-q)
m_d <- as.data.frame((rowSums(propX ^ q, na.rm=TRUE)) ^ p)
Expand All @@ -126,24 +126,47 @@ diversity <- function(data, type="all", method='euclidean', agg_type=NULL, q=0,
diversity <- merge(diversity,m_d, by=0, all=TRUE)
rownames(diversity) <- diversity$Row.names; diversity$Row.names <- NULL
}
if(!is.null(method)) {
if (method == 'jaccard') {
disX <- as.matrix(dist(t(X), method="Jaccard"), diag=1)

}
else if (method == 'euclidean') {
disX <- as.matrix(dist(t(X), method="euclidean"), diag=1)
}
else if (method == "cosine") {
disX <- as.matrix(dist(t(X), method="cosine"), diag=1)
}
}
else {
disX <- as.matrix(dist(t(X), method="euclidean"), diag=1)
}
if(type == 'rao-stirling' || type=='rs' || type == 'all') {
m_d <- as.data.frame(rowSums(propX^beta %*% disX^alpha * propX^beta))
colnames(m_d) <- c('rao-stirling')
disX <- distances(X, agg_type = agg_type, method=method) #compute distances first
disX_mask <- disX
disX_mask[ (!is.na(disX_mask))] <- 1
disX_mask[lower.tri(disX_mask)] <- 0
diag(disX_mask) <- 0
disX_mask[is.na(disX_mask)] <- 0

#print(propX)
N <- ncol(propX)
m_d <- data.frame(row.names = rownames(propX))
str(m_d)
m_d[,'rao-stirling'] <- NA

for(entity in row.names(propX)) #go into each entity
{
entity_data <- propX[entity,]
prop_i <- matrix(entity_data, ,nrow=N,ncol=N, byrow=TRUE)
prop_j <- matrix(entity_data, ,nrow=N,ncol=N, byrow=FALSE)
p_ij <- prop_i * prop_j
p_ij_mask <- p_ij
#p_ij_mask[p_ij_mask != 0] <- 1
p_ij_mask[upper.tri(p_ij_mask)] <- 1
diag(p_ij_mask) <- 0
p_ij_mask[lower.tri(p_ij_mask)] <- 0
rs_entity <- ((disX^alpha)*disX_mask) * ((p_ij^beta)*p_ij_mask) #deleting proportions or distances inexistent that might become 1, because of potentia of zero.
m_d[entity, 'rao-stirling'] <- sum(rs_entity)
print(entity)
print(p_ij)
print(p_ij_mask)
print((p_ij^beta)*p_ij_mask)
}
#print(rs_entity)
#print((p_ij^beta)*p_ij_mask)
print(propX)
print(disX)
print(disX_mask)

#print((disX^alpha)*disX_mask)
#m_d <- as.data.frame(rowSums(propX^beta %*% disX^alpha * propX^beta))

diversity <- merge(diversity,m_d, by=0, all=TRUE)
rownames(diversity) <- diversity$Row.names; diversity$Row.names <- NULL
}
Expand All @@ -156,6 +179,8 @@ diversity <- function(data, type="all", method='euclidean', agg_type=NULL, q=0,
return(diversity)
}



#' @title Get Data
#' @description It takes data as dataframe (edges) or as matrix (table) to be exported in proper form to be used by the diversity function.
#' @param data Data to be processed as dataframe or as matrix.
Expand All @@ -168,6 +193,7 @@ get_data <- function(data, agg_type)
if (is.data.frame(data)) {
if(!is.null(agg_type)) {
#diversity <- data.frame(row.names=levels(data[,2]))
data <- droplevels(data) #delete un used levels
X <- matrix(0, nrow=nlevels(data[,2]), ncol=nlevels(data[,1]), dimnames=list(levels(data[,2]),levels(data[,1])))
X[cbind(data[,2], data[,1])] <- data[,3]
}
Expand All @@ -191,14 +217,34 @@ get_data <- function(data, agg_type)

}


#' @title Variety
#' @description It computes the variety or simple diversity of a system. Number of types
#' @param data Data to be processed as dataframe or as matrix.
#' @examples
#' vari <- varity(data=d)
#' @return a dataframe with values of variety
#' @export
variety <- function(data, sort=TRUE)
{
vari <- diversity(data, type='v')
if(sort != FALSE)
{
vari['category'] <- row.names(vari)
vari <- vari[order(vari$variety, decreasing = TRUE), ]
vari['category'] <- NULL
}

return(vari)
}

#' @title Ubiquity
#' @description It computes the ubiquity or the rearnes of the categories
#' @param data Data to be processed as dataframe or as matrix.
#' @param data_agg Diferent of NULL if column analysis is needed.
#' @examples
#' ub <- ubiquity(data=d)
#' @return a dataframe with values of frequency per category. Decreasing order
#' @export
ubiquity <- function(data)
{
ubiq <- diversity(data, type='v', method='euclidean' , agg_type='col')
Expand Down Expand Up @@ -250,22 +296,48 @@ readEdges <- function(path,sepr,we=TRUE){
return(X)
}

#' @title A procedure to read a data frame from a csv file
#' @description It takes a file and creates a data frame for diversity analysis
#' @param path A string representing the path to a csv file. Rows and columns are described by strings (Ex.: "asind" "0010" 216000).
#' @param sepr Separator field used in the file to separate columns
#' @param we It indicates if the list of edges includes weights or not. Default is TRUE
#' @return A data frame with objects as rows and categories as cols
#' @title A procedure to read data of a data file in formats csv, dta or spss
#' @description It reads a file with data shaped as a matrix or as edges list. Several types of formats are allowed.
#' @param path A string representing the path to data file. If it is shpaed as a matrix, first column must include proper names of the categories. If it is shaped as edges list, it must contain three columns, these are entity, category, value.
#' @param sep Separator field used in the file to separate columns, if it is a CSV file. Default value is comma.
#' @param type It indicates the type of data to be read. This parameter facilitate the input of diverse type of data files, as spss or stata Posible options are the names of the mentioned softwares. Default value is csv.
#' @return A data frame with three columns, even when the input file is shaped as a matrix.
#' @examples
#' path <- "~/MyDiversity/data/sitc_cnt_62.csv"
#' sepr <- ' '
#' we <- TRUE
#' data <- readCSV(path,sepr,we)
readCSV <- function(path,sepr,we=TRUE){
data <- data.frame()
#if (!we) col = 2 else col = 3
col_classes <- c('factor', 'factor', NA)
data <- rbind(data, read.csv(path, sep=sepr, colClasses=col_classes))
#' path <- path_to_matrix_file <- system.file("extdata", "PantheonMatrix.csv", package = "diveR")
#' sep <- ','
#' data <- read.data(path)
#' path <- path_to_matrix_file <- system.file("extdata", "PantheonEdges.csv", package = "diveR")
#' data <- read.data(path)
read.data <- function(path, type='csv',sep=','){

if(type=='csv')
{
data_temp <- read.csv(path, sep=sep, nrow=1)
if(ncol(data_temp)>3)
{#matrix shape
data <- read.csv(path,sep=sep)
data <- melt(data, na.rm = TRUE)
}
else
{
col_classes <- c('factor', 'factor', NA)
data <- read.csv(path, sep=sep, colClasses=col_classes)
}

}
if(type=='spss')
{
data <- read.spss(path, use.value.labels = FALSE)
}
if(type=='stata')
{
data <- read.dta(path)
}
if(ncol(data)> 3)
{
data <- melt(data, na.rm=TRUE)
}
row.names(data) <- NULL
return(data)
}

Expand All @@ -290,40 +362,19 @@ writeCSV <- function(frame, path, sepr) {
#' @examples
#' Xdis <- dist_mat(data)
#' Xdis <- dist_mat(data, method="jaccard", agg_type='col')
dist_mat <- function(data, method=NULL, agg_type=NULL){
if (is.data.frame(data)) {
if(is.null(agg_type)) {
X <- matrix(0, nrow=nlevels(data[,2]), ncol=nlevels(data[,1]), dimnames=list(levels(data[,2]),levels(data[,1])))
X[cbind(data[,2], data[,1])] <- data[,3]
}
else {
X <- matrix(0, nrow=nlevels(data[,1]), ncol=nlevels(data[,2]), dimnames=list(levels(data[,1]),levels(data[,2])))
X[cbind(data[,1], data[,2])] <- data[,3]
}
}
else {
if (!is.null(agg_type)) {
X <- t(data)
}
else {
X <- data
}
}
if(!is.null(method)) {
distances <- function(data, method='cosine', agg_type=NULL){
X <- get_data(data=data, agg_type=agg_type)

if (method == 'jaccard') {
disX <- as.matrix(dist(t(X), method="Jaccard"), diag=1)

disX <- as.matrix(dist(t(X), method="Jaccard"), diag=1)
}
else if (method == 'euclidean') {
if (method == 'euclidean') {
disX <- as.matrix(dist(t(X), method="euclidean"), diag=1)
}
else if (method == "cosine") {
if (method == "cosine") {
disX <- as.matrix(dist(t(X), method="cosine"), diag=1)
}
}
else {
disX <- as.matrix(dist(t(X), method="cosine"), diag=1)
}

return(disX)
}

Expand All @@ -337,7 +388,7 @@ dist_mat <- function(data, method=NULL, agg_type=NULL){
#' Xdis <- dist_mat(data, method="euclidean")
#' disp <- disparity(Xdis)
#' disp <- disparity(Xdis, method="avg", agg_type='col')
disparity <- function(X, method="all", agg_type="row") {
disparity <- function(X, method="all", agg_type="col") {
if (agg_type == "col") {
X <- t(X)
disparity <- data.frame(row.names=rownames(X))
Expand All @@ -358,4 +409,38 @@ disparity <- function(X, method="all", agg_type="row") {
rownames(disparity) <- disparity$Row.names; disparity$Row.names <- NULL
}
return(disparity)
}
}

#' @title Balance
#' @description A procedure to compute several measures of the dimension balance of the diversity.
#' @param data A matrix of data with row and column names. Or a dataframe with three columns entity, category and value
#' @examples
#' balance(data)
balance <- function(data )
{
balance <- diversity(data, type='entropy') #first balance measure
measures <- c( 'gini','simpson', 'berger-parker', 'inverse-simpson', 'evenness' )
for(measure in measures)
{
m_b <- diversity(data, type=measure)
balance <- merge(balance,m_b, by=0, all=TRUE)
rownames(balance) <- balance$Row.names; balance$Row.names <- NULL
}

return(balance)
}



#' @title A procedure to plot the matrix of data as a pheatmap
#' @description It takes a matrix of data and plots a pheatmap of that matrix
#' @param data A matrix of data to be ploted
#' @param fontsize The size of the font used in the plot.
#' @param fontsize_row The font size of the labels in the rows
#' @examples
#' pheatmap(data)
plot.pheatmap <- function(data,title=NULL, fontsize=14, fontsize_row=7, color = c('blue','yellow'))
{
pheatmap(data, cluster_rows=FALSE, cluster_cols=FALSE,main=paste(title),show_colnames=FALSE,legend=FALSE,fontsize = fontsize,fontsize_row=7, color=color)

}
17 changes: 17 additions & 0 deletions diveR.Rproj
@@ -0,0 +1,17 @@
Version: 1.0

RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: No
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
1 change: 1 addition & 0 deletions inst/extdata/PantheonEdges.csv
@@ -0,0 +1 @@
Country,Occupation,ExportVietnam,Politician,6New Zealand,Politician,2Latvia,Politician,8Uruguay,Politician,5Chile,Politician,10South Africa,Politician,9Saudi Arabia,Politician,17Portugal,Politician,36China,Politician,38Canada,Politician,10South Africa,Religious Figure,1Saudi Arabia,Religious Figure,11Portugal,Religious Figure,2China,Religious Figure,2Portugal,Nobleman,2China,Military Person,4Latvia,Philosopher,1Saudi Arabia,Philosopher,1China,Philosopher,10Canada,Philosopher,1New Zealand,Writer,1Latvia,Writer,2Uruguay,Writer,3Chile,Writer,5South Africa,Writer,4Portugal,Writer,4China,Writer,12Canada,Writer,7China,Historian,1Vietnam,Journalist,1South Africa,Journalist,1Latvia,Soccer Player,1Uruguay,Soccer Player,10Chile,Soccer Player,5South Africa,Soccer Player,4Portugal,Soccer Player,26Canada,Soccer Player,1Uruguay,Referee,1Chile,Coach,1Portugal,Coach,2Chile,Tennis Player,3China,Tennis Player,1Canada,Tennis Player,1South Africa,Basketball Player,1China,Basketball Player,1Canada,Hocker Player,1New Zealand,Racecar Driver,2South Africa,Racecar Driver,1Canada,Racecar Driver,2China,Martial Arts,1New Zealand,Mountaineer,1Canada,Wrestler,4South Africa,Athlete,1Canada,Athlete,1Latvia,Chess Master,1Chile,Biologist,1South Africa,Biologist,2Canada,Biologist,1New Zealand,Chemist,2Latvia,Chemist,1China,Chemist,1Canada,Chemist,4New Zealand,Physicist,1South Africa,Physicist,1China,Physicist,4Canada,Physicist,3Saudi Arabia,Astronomer,1South Africa,Physician,1Saudi Arabia,Physician,1Portugal,Physician,2Canada,Physician,3Canada,Phychologist,2Canada,Economist,4China,Political Scientist,1China,Engineer,1China,Mathematician,1Canada,Computer Scientist,1China,Inventor,2Canada,Inventor,1New Zealand,Actor,3South Africa,Actor,2China,Actor,5Canada,Actor,37China,Musician,1Canada,Musician,8New Zealand,Singer,1Chile,Singer,1South Africa,Singer,1Portugal,Singer,2Canada,Singer,15Canada,Composer,2Latvia,Sculptor,1Latvia,Artist,1China,Artist,1Portugal,Architect,1China,Architect,1Canada,Architect,1Canada,Comic Artist,1New Zealand,Film Director,1Latvia,Film Director,1China,Film Director,3Canada,Film Director,2Latvia,Dancer,1South Africa,Social Activist,3China,Social Activist,4Vietnam,Model,2South Africa,Model,1Canada,Model,1Canada,Magician,1Vietnam,Presenter,2Canada,Pornographic Actor,2South Africa,Businessperson,1China,Businessperson,1Portugal,Explorer,6China,Explorer,2China,Astronaut,1Saudi Arabia,Companion,3Portugal,Companion,1Saudi Arabia,Extremist,1
Expand Down
1 change: 1 addition & 0 deletions inst/extdata/PantheonMatrix.csv
@@ -0,0 +1 @@
Country,Politician,Religious Figure,Nobleman,Military Person,Philosopher,Writer,Historian,Journalist,Soccer Player,Referee,Coach,Tennis Player,Basketball Player,Hocker Player,Racecar Driver,Martial Arts,Mountaineer,Wrestler,Athlete,Chess Master,Biologist,Chemist,Physicist,Astronomer,Physician,Phychologist,Economist,Political Scientist,Engineer,Mathematician,Computer Scientist,Inventor,Actor,Musician,Singer,Composer,Sculptor,Artist,Architect,Comic Artist,Film Director,Dancer,Social Activist,Model,Magician,Presenter,Pornographic Actor,Businessperson,Explorer,Astronaut,Companion,ExtremistVietnam,6,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,,2,,,,,,New Zealand,2,,,,,1,,,,,,,,,2,,1,,,,,2,1,,,,,,,,,,3,,1,,,,,,1,,,,,,,,,,,Latvia,8,,,,1,2,,,1,,,,,,,,,,,1,,1,,,,,,,,,,,,,,,1,1,,,1,1,,,,,,,,,,Uruguay,5,,,,,3,,,10,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Chile,10,,,,,5,,,5,,1,3,,,,,,,,,1,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,South Africa,9,1,,,,4,,1,4,,,,1,,1,,,,1,,2,,1,,1,,,,,,,,2,,1,,,,,,,,3,1,,,,1,,,,Saudi Arabia,17,11,,,1,,,,,,,,,,,,,,,,,,,1,1,,,,,,,,,,,,,,,,,,,,,,,,,,3,1Portugal,36,2,2,,,4,,,26,,2,,,,,,,,,,,,,,2,,,,,,,,,,2,,,,1,,,,,,,,,,6,,1,China,38,2,,4,10,12,1,,,,,1,1,,,1,,,,,,1,4,,,,,1,1,1,,2,5,1,,,,1,1,,3,,4,,,,,1,2,1,,Canada,10,,,,1,7,,,1,,,1,,1,2,,,4,1,,1,4,3,,3,2,4,,,,1,1,37,8,15,2,,,1,1,2,,,1,1,,2,,,,,
Expand Down
4 changes: 2 additions & 2 deletions man/diversity.Rd
Expand Up @@ -4,8 +4,8 @@
\alias{diversity}
\title{Diversity measures}
\usage{
diversity(data, type = "all", method = NULL, agg_type = NULL, q = 0,
alpha = 1, beta = 1)
diversity(data, type = "all", method = "euclidean", agg_type = NULL,
q = 0, alpha = 1, beta = 1)
}
\arguments{
\item{data}{A numeric matrix or data frame with objects as rows and categories as columns}
Expand Down
20 changes: 20 additions & 0 deletions man/get_data.Rd
@@ -0,0 +1,20 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/diversity.R
\name{get_data}
\alias{get_data}
\title{Get Data}
\usage{
get_data(data, agg_type)
}
\arguments{
\item{data}{Data to be processed as dataframe or as matrix.}

\item{data_agg}{Diferent of NULL if column analysis is needed.}
}
\description{
It takes data as dataframe (edges) or as matrix (table) to be exported in proper form to be used by the diversity function.
}
\examples{
X <- get_data(data=d, agg_type=NULL)
}

0 comments on commit 1b35828

Please sign in to comment.