-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
59 lines (47 loc) · 1.96 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
### Merge the training and the test sets to create one data set
## Train
# Read
xtrain <- read.table("UCI HAR Dataset/train/X_train.txt") # other variables
ytrain <- read.table("UCI HAR Dataset/train/y_train.txt") # activity
subjtrain <- read.table("UCI HAR Dataset/train/subject_train.txt") # subject
# Assemble
train <- cbind(subjtrain, ytrain)
train <- cbind(train, xtrain)
remove(subjtrain, ytrain, xtrain) # For saving memory
## Test
# Read
xtest <- read.table("UCI HAR Dataset/test/X_test.txt") # other variables
ytest<- read.table("UCI HAR Dataset/test/y_test.txt") # activity
subjtest <- read.table("UCI HAR Dataset/test/subject_test.txt") # subject
# Assemble
test <- cbind(subjtest, ytest)
test <- cbind(test, xtest)
remove(subjtest, ytest, xtest) # For saving memory
## Train + Test
table <- rbind(train, test)
remove(train, test) # For saving memory
## Add header
features <- read.table("UCI HAR Dataset/features.txt")
features <- as.vector(features[,"V2"])
features <- c(c("subject", "activity"), features)
names(table) <- features
### Extract only the measurements on the mean and standard deviation for each measurement
## Get column names
meanstd <- grep("mean|std", features, value = TRUE)
cols <- c(c("subject", "activity"), meanstd)
## Filter
table <- table[,cols]
### Use descriptive activity names to name the activities in the data set
## Read activity names
activ <- read.table("UCI HAR Dataset/activity_labels.txt")
## Replace
table$activity <- lapply(table$activity, function(x) as.character(activ$V2[match(x, activ$V1)]))
table$activity <- as.character(table$activity)
### Appropriately label the dataset
names(table) <- gsub("()", "", names(table), fixed = TRUE)
names(table) <- gsub("-", "", names(table), fixed = TRUE)
names(table) <- tolower(names(table))
### Average of each variable for each activity and each subject
library(dplyr)
summary <- table %>% group_by(subject, activity) %>% summarise_all(mean)
write.table(summary, "summary.txt", row.names = FALSE)