# Reshaping Data

In [2]:
library(reshape2)

In [3]:
head(mtcars)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1


In [4]:
mtcars$carname <- rownames(mtcars)
# melt separates the measure.vars and retains the id
carMelt <- melt(mtcars, id=c('carname', 'gear', 'cyl'), measure.vars=c("mpg", "hp"))
head(carMelt, n=3)
tail(carMelt, n=3)

carname,gear,cyl,variable,value
Mazda RX4,4,6,mpg,21.0
Mazda RX4 Wag,4,6,mpg,21.0
Datsun 710,4,4,mpg,22.8


Unnamed: 0,carname,gear,cyl,variable,value
62,Ferrari Dino,5,6,hp,175
63,Maserati Bora,5,8,hp,335
64,Volvo 142E,4,4,hp,109


In [5]:
cylData <- dcast(carMelt, cyl ~ variable, length)
# dcast actually summarises the data such that it sums how many measurements of each cyl for mpg and hp
# putting the length states that how many measurements are taken
cylData

cyl,mpg,hp
4,11,11
6,7,7
8,14,14


In [6]:
cylData <- dcast(carMelt, cyl ~ variable, mean)
# dcast actually summarises the data such that it sums how many measurements of each cyl for mpg and hp
# putting the length states that how many measurements are taken
# this is the same as SQL avg(mpg), avg(hp) group by cyl
cylData

cyl,mpg,hp
4,26.66364,82.63636
6,19.74286,122.28571
8,15.1,209.21429


# Averaging values

In [7]:
head(InsectSprays)

count,spray
10,A
7,A
20,A
14,A
14,A
12,A


In [8]:
# apply to count along the index spray, the function sum
tapply(InsectSprays$count, InsectSprays$spray, sum)

In [9]:
spIns = split(InsectSprays$count, InsectSprays$spray)
spIns

In [10]:
# apply sum to each of the index
sprCount = lapply(spIns, sum)

In [11]:
# produces a vector
unlisted_spr <- unlist(sprCount)
unlisted_spr

In [12]:
# vector is like javascript array which each values has a string index
# to read a value in a vector, use the following:

unlisted_spr['A']

In [13]:
# sapply is the same as lapply. however it simplifies the result and returns a vector instead
sapply(spIns, sum)

In [14]:
library(plyr)

#result of ddply is a list

ddplyResult <- ddply(InsectSprays,.(spray), summarize, mySum=sum(count))
ddplyResult

spray,mySum
A,174
B,184
C,25
D,59
E,42
F,200


In [18]:
ddplyResult <- ddply(InsectSprays,.(spray), summarize, sum=ave(count, FUN=sum))
ddplyResult

spray,sum
A,174
A,174
A,174
A,174
A,174
A,174
A,174
A,174
A,174
A,174
