## Working with Dataframes

### Loading a CSV into a DataFrame

In [1]:
balloons = read.csv('http://archive.ics.uci.edu/ml/machine-learning-databases/balloons/adult-stretch.data', header = F)

### `head`

In [2]:
head(balloons)

V1,V2,V3,V4,V5
YELLOW,SMALL,STRETCH,ADULT,True
YELLOW,SMALL,STRETCH,CHILD,True
YELLOW,SMALL,DIP,ADULT,True
YELLOW,SMALL,DIP,CHILD,False
YELLOW,SMALL,DIP,CHILD,False
YELLOW,LARGE,STRETCH,ADULT,True


### `colnames`

In [3]:
colnames(balloons) <- c('color', 'size', 'act', 'age', 'inflated')

In [4]:
colnames(balloons)

In [5]:
head(balloons)

color,size,act,age,inflated
YELLOW,SMALL,STRETCH,ADULT,True
YELLOW,SMALL,STRETCH,CHILD,True
YELLOW,SMALL,DIP,ADULT,True
YELLOW,SMALL,DIP,CHILD,False
YELLOW,SMALL,DIP,CHILD,False
YELLOW,LARGE,STRETCH,ADULT,True


### Data Dimensions

In [6]:
dim(balloons)

### `rownames`


In [7]:
n = dim(balloons)[1]
rownames(balloons) <- paste("balloon_", 1:n, sep="")

In [8]:
head(balloons)

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_3,YELLOW,SMALL,DIP,ADULT,True
balloon_4,YELLOW,SMALL,DIP,CHILD,False
balloon_5,YELLOW,SMALL,DIP,CHILD,False
balloon_6,YELLOW,LARGE,STRETCH,ADULT,True


### Columns are Vectors

In [9]:
balloons$color

In [10]:
balloons[,'color']

#### Columns Accessed by name are filtered DataFrames

In [13]:
balloons['color']

Unnamed: 0,color
balloon_1,YELLOW
balloon_2,YELLOW
balloon_3,YELLOW
balloon_4,YELLOW
balloon_5,YELLOW
balloon_6,YELLOW
balloon_7,YELLOW
balloon_8,YELLOW
balloon_9,YELLOW
balloon_10,YELLOW


In [14]:
balloons[c('size','act')]

Unnamed: 0,size,act
balloon_1,SMALL,STRETCH
balloon_2,SMALL,STRETCH
balloon_3,SMALL,DIP
balloon_4,SMALL,DIP
balloon_5,SMALL,DIP
balloon_6,LARGE,STRETCH
balloon_7,LARGE,STRETCH
balloon_8,LARGE,DIP
balloon_9,LARGE,DIP
balloon_10,LARGE,DIP


## Rows are filtered DataFrames

In [15]:
balloons['balloon_1',]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True


In [16]:
balloons[paste("balloon_", 1:3, sep=""),]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_3,YELLOW,SMALL,DIP,ADULT,True


In [17]:
grepl('_1.', rownames(balloons))

In [21]:
balloons[grepl('_1.', rownames(balloons)),]

Unnamed: 0,color,size,act,age,inflated
balloon_10,YELLOW,LARGE,DIP,CHILD,False
balloon_11,PURPLE,SMALL,STRETCH,ADULT,True
balloon_12,PURPLE,SMALL,STRETCH,CHILD,True
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_14,PURPLE,SMALL,DIP,CHILD,False
balloon_15,PURPLE,SMALL,DIP,CHILD,False
balloon_16,PURPLE,LARGE,STRETCH,ADULT,True
balloon_17,PURPLE,LARGE,STRETCH,CHILD,True
balloon_18,PURPLE,LARGE,DIP,ADULT,True
balloon_19,PURPLE,LARGE,DIP,CHILD,False


In [22]:
balloons[balloons$color == 'PURPLE',]

Unnamed: 0,color,size,act,age,inflated
balloon_11,PURPLE,SMALL,STRETCH,ADULT,True
balloon_12,PURPLE,SMALL,STRETCH,CHILD,True
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_14,PURPLE,SMALL,DIP,CHILD,False
balloon_15,PURPLE,SMALL,DIP,CHILD,False
balloon_16,PURPLE,LARGE,STRETCH,ADULT,True
balloon_17,PURPLE,LARGE,STRETCH,CHILD,True
balloon_18,PURPLE,LARGE,DIP,ADULT,True
balloon_19,PURPLE,LARGE,DIP,CHILD,False
balloon_20,PURPLE,LARGE,DIP,CHILD,False


In [23]:
balloons[balloons$act == 'DIP',]

Unnamed: 0,color,size,act,age,inflated
balloon_3,YELLOW,SMALL,DIP,ADULT,True
balloon_4,YELLOW,SMALL,DIP,CHILD,False
balloon_5,YELLOW,SMALL,DIP,CHILD,False
balloon_8,YELLOW,LARGE,DIP,ADULT,True
balloon_9,YELLOW,LARGE,DIP,CHILD,False
balloon_10,YELLOW,LARGE,DIP,CHILD,False
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_14,PURPLE,SMALL,DIP,CHILD,False
balloon_15,PURPLE,SMALL,DIP,CHILD,False
balloon_18,PURPLE,LARGE,DIP,ADULT,True


In [24]:
summary(balloons)

    color       size         act        age      inflated      
 PURPLE:10   LARGE:10   DIP    :12   ADULT: 8   Mode :logical  
 YELLOW:10   SMALL:10   STRETCH: 8   CHILD:12   FALSE:8        
                                                TRUE :12       
                                                NA's :0        

## Practice

#### Display the yellow balloons

In [27]:
balloons[balloons$color == 'YELLOW',]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_3,YELLOW,SMALL,DIP,ADULT,True
balloon_4,YELLOW,SMALL,DIP,CHILD,False
balloon_5,YELLOW,SMALL,DIP,CHILD,False
balloon_6,YELLOW,LARGE,STRETCH,ADULT,True
balloon_7,YELLOW,LARGE,STRETCH,CHILD,True
balloon_8,YELLOW,LARGE,DIP,ADULT,True
balloon_9,YELLOW,LARGE,DIP,CHILD,False
balloon_10,YELLOW,LARGE,DIP,CHILD,False


#### Display the inflated balloons

In [29]:
balloons[balloons$inflated == 'TRUE',]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_3,YELLOW,SMALL,DIP,ADULT,True
balloon_6,YELLOW,LARGE,STRETCH,ADULT,True
balloon_7,YELLOW,LARGE,STRETCH,CHILD,True
balloon_8,YELLOW,LARGE,DIP,ADULT,True
balloon_11,PURPLE,SMALL,STRETCH,ADULT,True
balloon_12,PURPLE,SMALL,STRETCH,CHILD,True
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_16,PURPLE,LARGE,STRETCH,ADULT,True


#### Display the stretching balloons

In [30]:
balloons[balloons$act == 'STRETCH',]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_6,YELLOW,LARGE,STRETCH,ADULT,True
balloon_7,YELLOW,LARGE,STRETCH,CHILD,True
balloon_11,PURPLE,SMALL,STRETCH,ADULT,True
balloon_12,PURPLE,SMALL,STRETCH,CHILD,True
balloon_16,PURPLE,LARGE,STRETCH,ADULT,True
balloon_17,PURPLE,LARGE,STRETCH,CHILD,True


#### Display the large balloons

#### Display the yellow balloons

#### Display just the color and size columns

In [31]:
balloons[,c("color","size")]

Unnamed: 0,color,size
balloon_1,YELLOW,SMALL
balloon_2,YELLOW,SMALL
balloon_3,YELLOW,SMALL
balloon_4,YELLOW,SMALL
balloon_5,YELLOW,SMALL
balloon_6,YELLOW,LARGE
balloon_7,YELLOW,LARGE
balloon_8,YELLOW,LARGE
balloon_9,YELLOW,LARGE
balloon_10,YELLOW,LARGE


#### Display just the color and size columns for large, yellow balloons

In [32]:
balloons[balloons$size == "LARGE" & balloons$color == "YELLOW",c("color","size")]

Unnamed: 0,color,size
balloon_6,YELLOW,LARGE
balloon_7,YELLOW,LARGE
balloon_8,YELLOW,LARGE
balloon_9,YELLOW,LARGE
balloon_10,YELLOW,LARGE
