Getting and cleaning data course project This repo has been created to complete the course Coursera/Getting and clean data's final project The Codebook.md file includes the description of the variables, description of the data and transformation done to generate final "tidy_data.csv" file that includes the outcome of the attached script in R
run_analysis.R code runs to accomplish:
- Merges the training and the test sets to create one data set.
- Extracts only the measurements on the mean and standard deviation for each measurement.
- Uses descriptive activity names to name the activities in the data set
- Appropriately labels the data set with descriptive variable names.
- From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.
library(plyr)
library(dplyr)
library(utils)
library(tibble)
features <- read.table(file="./UCI HAR Dataset/features.txt", header = FALSE, col.names = c("index","feature_name"))
X_train <- read.table(file="./UCI HAR Dataset/train/X_train.txt", header = FALSE)
X_test <- read.table(file="./UCI HAR Dataset/test/X_test.txt", header = FALSE)
names(X_train) <- features$feature_name
names(X_test) <- features$feature_name
X_all<- rbind(X_train,X_test)
X_all_meanstd <- select(X_all,grep("mean|std",names(X_all)))
activity <- read.table(file="./UCI HAR Dataset/activity_labels.txt", header = FALSE, col.names = c("act_code","activity"))
y_train <- read.table(file="./UCI HAR Dataset/train/y_train.txt")
y_test <- read.table(file="./UCI HAR Dataset/test/y_test.txt")
y_all <- rbind(y_train,y_test) ## merging rows
y_all<- mutate(y_all, activity=activity[y_all[,],2]) ## generating descriptive column
X_all_meanstd <- mutate(X_all_meanstd,activity=y_all$activity)
subject_train <- read.table(file="./UCI HAR Dataset/train/subject_train.txt", header = FALSE, col.names="subject")
subject_test <- read.table(file="./UCI HAR Dataset/test/subject_test.txt", header = FALSE, col.names="subject")
subject_all <- rbind(subject_train, subject_test)
X_all_meanstd <- mutate(X_all_meanstd,subject=subject_all$subject)
all_df <- as_tibble(X_all_meanstd)
all_df <- group_by(all_df, activity, subject)
tidy_data <- summarise_each(all_df,funs = mean)
write.table(tidy_data, file = "tidy_data.txt")