# Reshaping Data

In [1]:
library(reshape2)

In [2]:
head(mtcars)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225,105,2.76,3.46,20.22,1,0,3,1


In [9]:
# melting dataset

# in the following example we are creating a new data set
# with carname, gear and cyl, variable and value columns
# in this case the variable column will contain the mpg or hp values
# and value will have the values of the mpg and hp original columns
# that is why the number of rows of the dataset doubled in size.
mtcars$carname <- rownames(mtcars)
head(mtcars, n=3)
nrow(mtcars)

carMelt <- melt(mtcars, id=c("carname","gear","cyl"), measure.vars=c("mpg","hp"))
head(carMelt, n=3)
tail(carMelt, n=3)
nrow(carMelt)


Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb,carname
Mazda RX4,21.0,6,160,110,3.9,2.62,16.46,0,1,4,4,Mazda RX4
Mazda RX4 Wag,21.0,6,160,110,3.9,2.875,17.02,0,1,4,4,Mazda RX4 Wag
Datsun 710,22.8,4,108,93,3.85,2.32,18.61,1,1,4,1,Datsun 710


carname,gear,cyl,variable,value
Mazda RX4,4,6,mpg,21.0
Mazda RX4 Wag,4,6,mpg,21.0
Datsun 710,4,4,mpg,22.8


Unnamed: 0,carname,gear,cyl,variable,value
62,Ferrari Dino,5,6,hp,175
63,Maserati Bora,5,8,hp,335
64,Volvo 142E,4,4,hp,109


In [14]:
# Casting data frames

# with the following we are counting the number of measures
# of mpg and hp by cyl value
cylData <- dcast(carMelt, cyl ~ variable)
cylData

# and with this one we are calculating the mean of column varaible
# grouped by cyl.
cylData <- dcast(carMelt, cyl ~ variable, mean)
cylData

Aggregation function missing: defaulting to length


cyl,mpg,hp
4,11,11
6,7,7
8,14,14


cyl,mpg,hp
4,26.66364,82.63636
6,19.74286,122.28571
8,15.1,209.21429


In [16]:
# in this dataset spray column can be A, B, C, D, E or F
head(InsectSprays)

count,spray
10,A
7,A
20,A
14,A
14,A
12,A


In [21]:
# in this case we are calculating the sum of count by each type of spray
print(tapply(InsectSprays$count, InsectSprays$spray, sum))

  A   B   C   D   E   F 
174 184  25  59  42 200 


In [25]:
# this is another way to do the same as above but by using
# split and lapply
spIns = split(InsectSprays$count, InsectSprays$spray)
spIns
sprCount = lapply(spIns, sum)
sprCount
sprCount = sapply(spIns, sum)
sprCount


In [31]:
library(plyr)

In [32]:
ddply(InsectSprays,.(spray), summarize, sum=sum(count))

spray,sum
A,174
B,184
C,25
D,59
E,42
F,200


In [36]:
# creating a new variable
dim(InsectSprays)
spraySums <- ddply(InsectSprays, .(spray), summarize, sum=ave(count, FUN=sum))
dim(spraySums)
head(spraySums)

spray,sum
A,174
A,174
A,174
A,174
A,174
A,174
