In [None]:
# 1. Load dplyr
if (!require("dplyr")) {
    message("Installing dplyr")
    install.packages("dplyr")
}

In [None]:
# 2. Create project data directory
if (!file.exists("./Data")) 
{
    message("Creating data directory")
    dir.create("./Data")
}

In [None]:
# 4. Extract Human Activity Recognition dataset
if (!file.exists("./Data/UCI HAR Dataset")) 
{
    message("Extracting dataset")
    unzip("./Data/UCI_HAR_Dataset.zip", 
          overwrite = FALSE, 
          exdir = "./Data")
}

In [None]:
# 5. Load the features
#   5.1 Identify the mean() and std() features
#   5.2 Create syntactically valid variable names from the features 
#       to be used as column names for the training and test datasets
features <- tbl_df(
    read.table("./Data/UCI HAR Dataset/features.txt", 
               col.names = c("Id", "Feature")))

features <- features %>% 
    mutate(Is.Mean = grepl("mean\\(\\)", features$Feature)) %>%
    mutate(Is.Std = grepl("std\\(\\)", features$Feature)) %>%
    mutate(Feature.Variable = make.names(features$Feature, unique = TRUE)) %>%
    mutate(Feature.Variable = gsub("^t", "Time.", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("\\.t", ".Time.", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("^f", "Frequency.", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("\\.f", ".Frequency.", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("^angle\\.", "Angle.", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("BodyBody", "Body", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("Acc", ".Acc", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("Gyro", ".Gyro", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("Jerk", ".Jerk", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("Mag", ".Mag", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("\\.\\.", ".", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("\\.\\.", ".", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("\\.$", "", Feature.Variable)) %>%
    mutate(Feature.Variable = gsub("(^|[\\.])([[:alpha:]])", "\\1\\U\\2", 
                    Feature.Variable, perl=TRUE))

In [None]:
features

In [None]:
# 6. Load activities
activities <- tbl_df(
    read.table("./Data/UCI HAR Dataset/activity_labels.txt", 
               col.names = c("Id", "Activity")))

In [None]:
# 7. Load the training dataset
#   7.1 Add training data column names from features
#   7.2 Add subject data, and activity data to the training dataset
train <- tbl_df(read.table("./Data/UCI HAR Dataset/train/X_train.txt"))
colnames(train) <- features$Feature.Variable
train <- cbind(
        rename(tbl_df(read.table("./Data/UCI HAR Dataset/train/subject_train.txt")), 
               Subject.Id = V1),
        rename(tbl_df(read.table("./Data/UCI HAR Dataset/train/y_train.txt")),
               Activity.Id = V1),
        Dataset.Partition = c("Training"),
        train)

In [None]:
head(train)

In [None]:
# 8. Load the test dataset
#   8.1 Add test data column names from features
#   8.2 Add subject data, and activity data to the test dataset
test <- tbl_df(read.table("./Data/UCI HAR Dataset/test/X_test.txt"))
colnames(test) <- features$Feature.Variable
test <- cbind(
    rename(tbl_df(read.table("./Data/UCI HAR Dataset/test/subject_test.txt")), 
           Subject.Id = V1),
    rename(tbl_df(read.table("./Data/UCI HAR Dataset/test/y_test.txt")),
           Activity.Id = V1),
    Dataset.Partition = c("Test"),
    test)

In [None]:
head(test)

In [None]:
merged <- rbind(train, test)
head(merged)

In [None]:
merged <- rbind(train, test) %>% left_join(activities, by = c("Activity.Id" = "Id")) %>%
    select(Subject.Id, Activity,   
           one_of(
               filter(features, Is.Mean == TRUE | Is.Std == TRUE)
head(merged)

In [None]:
# 9. Merge the training and test datasets
#   9.1 Add descriptive activity names from activities 
#   9.2 Select the mean and std deviation features only
#   9.3 Group by subject and activity
merged <- rbind(train, test) %>%
    left_join(activities, by = c("Activity.Id" = "Id")) %>%
    select(Subject.Id, Activity,   
           one_of(
               filter(features, Is.Mean == TRUE | Is.Std == TRUE) %>%
                   select(Feature.Variable) %>% .[["Feature.Variable"]])) %>%
    group_by(Subject.Id, Activity)

In [None]:
# 10. Create a tidy summary of feature means grouped by subject and activity
tidy_summary <- summarise_each(merged, funs(mean))
tidy_summary

In [None]:
# 11. Write tidy summary to file
write.table(tidy_summary, "tidy_summary.txt", row.names = FALSE)

In [1]:
data_dir <- "UCI HAR Dataset"

file_path <- function(...) { paste(data_dir,...,sep="/") }

In [2]:
#Load the Data Set
training_set <- read.table("./train/X_train.txt")
test_set <- read.table("./test/X_test.txt")
data_set1 <- rbind(training_set,test_set)

In [3]:
dim(data_set1)

In [4]:
# Removing unused data
rm(test_set,training_set)

In [8]:
#2. Extracts only the measurements on the mean and standard deviation for each
#measurement. 
features_name <- read.table("./features.txt")[,2]
colnames(data_set1) <- features_name
selected_measures <- grepl('-(mean|std)\\(',features_name)
data_set <- subset(data_set1, select=selected_measures)

In [9]:
dim(data_set)

In [10]:
#4. Appropriately labels the data set with descriptive variable names. 
colnames(data_set) <- gsub("mean", "Mean.", colnames(data_set))
colnames(data_set) <- gsub("std", "Std.", colnames(data_set))
colnames(data_set) <- gsub("^t", "Time.", colnames(data_set))
colnames(data_set) <- gsub("^f", "Frequency.", colnames(data_set))
colnames(data_set) <- gsub("\\(\\)", "", colnames(data_set))
colnames(data_set) <- gsub("-", "", colnames(data_set))
colnames(data_set) <- gsub("BodyBody", "Body.", colnames(data_set))
colnames(data_set) <- gsub("^", "MeanOf.", colnames(data_set))
colnames(data_set)
data_set[1:4,1:5]

MeanOfTimeBodyAccMeanX,MeanOfTimeBodyAccMeanY,MeanOfTimeBodyAccMeanZ,MeanOfTimeBodyAccStdX,MeanOfTimeBodyAccStdY
0.2885845,-0.02029417,-0.1329051,-0.9952786,-0.9831106
0.2784188,-0.01641057,-0.1235202,-0.9982453,-0.9753002
0.2796531,-0.01946716,-0.1134617,-0.9953796,-0.967187
0.2791739,-0.02620065,-0.1232826,-0.9960915,-0.9834027


In [None]:
#3. Uses descriptive activity names to name the activities in the data set
activities_train <- read.table("train/y_train.txt")
activities_test <- read.table("test/y_test.txt")
activities <- rbind(activities_train,activities_test)[,1]
labels <- c("WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS",
            "SITTING", "STANDING", "LAYING")
activities <- labels[activities]
data_set <- cbind(Activity = activities,data_set)
data_set[1:4,1:5]

In [None]:
#5. From the data set in step 4, creates a second, independent tidy data set 
#with the average of each variable for each activity and each subject.
subjects_train <- read.table("train/subject_train.txt")
subjects_test <- read.table("test/subject_test.txt")
subjects <- rbind(subjects_train,subjects_test)[,1]
data_set <- cbind(Subject = subjects,data_set)
data_set[1:4,1:5]

In [None]:
library('dplyr')
average_data_set <- data_set %>%
    group_by(Subject,Activity) %>%
    summarise_each(funs(mean))

In [None]:
average_data_set

In [None]:
dim(average_data_set)

In [None]:
write.table(average_data_set,row.name = FALSE,file = "tidy_data_set.txt")    