update readme

kmoir · Apr 24, 2015 · cfe140e · cfe140e
1 parent 7e838a2
commit cfe140e
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,20 @@
 # getdata-013
+
+## Assignment instructions
+
+You should create one R script called run_analysis.R that does the following. 
+
+* Merges the training and the test sets to create one data set.
+* Extracts only the measurements on the mean and standard deviation for each measurement. 
+* Uses descriptive activity names to name the activities in the data set
+* Appropriately labels the data set with descriptive variable names. 
+* From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.
+
+
+## To run the code
+
+* Download https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip and unzip it into a directory 
+* set the path in R to the root of the parent directory where you unzipped the file ie. setwd("/Users/kmoir/coursera/UCI HAR Dataset")
+* source("run_analysis.R")
+* The result should be all_tidy_data.txt
+
diff --git a/run_analysis.R b/run_analysis.R
@@ -1,8 +1,6 @@
 features = read.table("features.txt")
 features= features[,2]
-features
 our_features <- grepl("mean|std", features)
-our_features
 
 activity_labels = read.table("activity_labels.txt")
 activity_labels = activity_labels[,2]
@@ -20,13 +18,10 @@ names(train_subject) = c("subjectid")
 
 names(train_data) = features
 train_data = train_data[,our_features]
-head(train_data)
 
 train_labels[,2] = activity_labels[train_labels[,1]]
-head(train_labels)
 
 names(train_labels) = c("activityid", "activityname")
-tail(train_labels, n=5)
 
 #merge three training data frames together
 all_train_data <- cbind(train_subject, train_data, train_labels)
@@ -43,34 +38,30 @@ names(test_subject) = "subjectid"
 
 names(test_data) = features
 test_data = test_data[,our_features]
-head(test_data)
 
 test_labels[,2] = activity_labels[test_labels[,1]]
 names(test_labels) = c("activityid", "activityname")
-tail(test_labels, n=5)
 
 #merge three test data frames together
 all_test_data <- cbind(test_subject, test_data, test_labels)
 
 all_data <- rbind(all_train_data, all_test_data)
 
-library(plyr)
 all_tidy_data <- aggregate(. ~subjectid + activityname, all_data, mean)
 #make names lowercase
 names(all_tidy_data) = tolower(names(all_tidy_data))
-#clean up variable names so they are more readable
+#clean up heading names so they are more readable
 names(all_tidy_data) = sub("bodybody","body",names(all_tidy_data))
 names(all_tidy_data) = sub("acc","accelerometer",names(all_tidy_data))
 names(all_tidy_data) = sub("gyro","gyroscope",names(all_tidy_data))
 names(all_tidy_data) = sub("^f","frequency",names(all_tidy_data))
 names(all_tidy_data) = sub("^t","time",names(all_tidy_data))
 
+#clean up the activityname column so the activities are in lowercase and remove "_"
 all_tidy_data[,2] = sub("_","",all_tidy_data[,2])
 all_tidy_data[,2] = tolower(all_tidy_data[,2])
 
-#should mean results be in a new column?
 all_tidy_data <- all_tidy_data[order(all_tidy_data$subjectid,all_tidy_data$activityname),]
 
 write.table(all_tidy_data, file="all_tidy_data.txt", row.name=FALSE)
-head(all_tidy_data, n=1)