# Data Frames in R

#### Dataset is a 2D object represented in a tabular format.
#### Each column in a dataset represents value and each row represents set of value of variables.
#### The column names can't be empty.
#### All the rows in particular column will have the same data type.


In [13]:
# Create a dataframe
studentdf <- data.frame(
roll_no=c(1,2,3,4,5),
name=c("A","B","C","D","E"),
age=c(18,17,16,15,17)
)

#Printing dataframe
studentdf

roll_no,name,age
<dbl>,<chr>,<dbl>
1,A,18
2,B,17
3,C,16
4,D,15
5,E,17


### Accessing the column

In [14]:
# First method
studentdf$name

In [15]:
# Second method (column name)
studentdf[["name"]]

In [16]:
# Third method (column index)
studentdf[[2]]


### Changing column name

In [17]:
names(studentdf)[names(studentdf)=="roll_no"]<-"old_roll_no"
studentdf

old_roll_no,name,age
<dbl>,<chr>,<dbl>
1,A,18
2,B,17
3,C,16
4,D,15
5,E,17


### Adding column to dataframe

In [18]:
# Adding new column called 'Marks'

studentdf$marks<-c(90,89,80,91,70)

studentdf

old_roll_no,name,age,marks
<dbl>,<chr>,<dbl>,<dbl>
1,A,18,90
2,B,17,89
3,C,16,80
4,D,15,91
5,E,17,70


### Adding row to dataframe

In [19]:
studentdf2 <- data.frame(
old_roll_no = c(6),
    name=c("F"),
    age=c(18),
    marks=c(88)
)
newstudentdf<-rbind(studentdf,studentdf2)


In [20]:
newstudentdf

old_roll_no,name,age,marks
<dbl>,<chr>,<dbl>,<dbl>
1,A,18,90
2,B,17,89
3,C,16,80
4,D,15,91
5,E,17,70
6,F,18,88


### Extracting multiple columns from Data Frame

In [23]:
newdf<- data.frame(newstudentdf$name,newstudentdf$old_roll_no,newstudentdf$marks)

newdf


newstudentdf.name,newstudentdf.old_roll_no,newstudentdf.marks
<chr>,<dbl>,<dbl>
A,1,90
B,2,89
C,3,80
D,4,91
E,5,70
F,6,88


### Extracting specific rows & all columns from dataframe

In [24]:
# Extracting 1st to 4th row & all columns
newdf2<-newstudentdf[1:4,]
newdf2

Unnamed: 0_level_0,old_roll_no,name,age,marks
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>
1,1,A,18,90
2,2,B,17,89
3,3,C,16,80
4,4,D,15,91


In [27]:
# Extracting all rows and specific columns
newdf3<- newstudentdf[,3:4]
newdf3

age,marks
<dbl>,<dbl>
18,90
17,89
16,80
15,91
17,70
18,88


In [28]:
#Extracting specific row and columns from dataset
newdf4<- newstudentdf[c(2:3),c(2:3)]
newdf4

Unnamed: 0_level_0,name,age
Unnamed: 0_level_1,<chr>,<dbl>
2,B,17
3,C,16


### Dropping columns from Dataset

In [29]:
# Lets drop age colums
newdf5= subset(newstudentdf, select= -c(age))
newdf5

Unnamed: 0_level_0,old_roll_no,name,marks
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>
1,1,A,90
2,2,B,89
3,3,C,80
4,4,D,91
5,5,E,70
6,6,F,88


In [30]:
# Dropping columns by mentioning index 
newdf6<- newstudentdf[-c(3)]
newdf6

old_roll_no,name,marks
<dbl>,<chr>,<dbl>
1,A,90
2,B,89
3,C,80
4,D,91
5,E,70
6,F,88


### Dropping rows from Data Frame


In [39]:
newdf7<- newstudentdf[!(newstudentdf$name=="A"),]
newdf7

Unnamed: 0_level_0,old_roll_no,name,age,marks
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>
2,2,B,17,89
3,3,C,16,80
4,4,D,15,91
5,5,E,17,70
6,6,F,18,88


In [43]:
# Dropping rows by mentioning row index
newdf8<-newstudentdf[-c(2,5),]
newdf8

Unnamed: 0_level_0,old_roll_no,name,age,marks
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>
1,1,A,18,90
3,3,C,16,80
4,4,D,15,91
6,6,F,18,88


### Sort a data frame

In [44]:
# sorting data frame by columns in ascending order
newstudentdf[order(newstudentdf$age),] # order() function is used

Unnamed: 0_level_0,old_roll_no,name,age,marks
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>
4,4,D,15,91
3,3,C,16,80
2,2,B,17,89
5,5,E,17,70
1,1,A,18,90
6,6,F,18,88


In [45]:
# sorting data frame by columns in descending order
newstudentdf[order(-newstudentdf$age),]

Unnamed: 0_level_0,old_roll_no,name,age,marks
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>
1,1,A,18,90
6,6,F,18,88
2,2,B,17,89
5,5,E,17,70
3,3,C,16,80
4,4,D,15,91
