-
Notifications
You must be signed in to change notification settings - Fork 19
/
week1_inclass_exercise_script.R
137 lines (107 loc) · 5.7 KB
/
week1_inclass_exercise_script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
## Warning: the code below contains deliberate errors.
## Also it sometimes contains "???" which you need to replace with appropriate text.
#######################################################
## first line of code is to clear R's memory
rm(list=ls())
#######################################################
#######################################################
## First we load some required add-on package
## (you need to install these if you haven't already)
library(readr)
library(dplyr)
library(ggplot2)
library(skimr)
#######################################################
#######################################################
## Now read in the data, using the read_csv() function.
## First we should assign, using the assignment arrow,
## the URL of the published version of the google sheet data into an object.
the_URL - "https://docs.google.com/spreadsheets/d/e/2PACX-1vQFgYX1QhF9-UXep22XmPow1ZK5nbFHix9nkQIa0DzqUhPtZRxH1HtY-hsno32zDiuIHiLb2Hvphk1L/pub?gid=1188775314&single=true&output=csv"
## then we use the read_csv function to read in the data from that URL
class_RTs <- read_cvs(the_URL)
#######################################################
## DO NOT USE read.csv above!!!
#######################################################
## Have a look at the data in R, does it look OK?
clas_RTs
#######################################################
#######################################################
## Now we need to do some data wrangling (cleaning and tidying)
## Clean up the column / variable names:
## Must be very careful to get the next line right!!! Really important!!!
## Otherwise columns will have the wrong names, which would be very confusing
names(class_RTs) <- c("Timestamp", "ID", "Gender", "Pref_Reaction_time_1"
"Verbal_memory_score", "Number_memory_score",
"Visual_memory_score",
"Weight_kgs", "Handed", "Nonpref_Reaction_time_ave",
"Pref_Reaction_time_2", "Pref_Reaction_time_3",
"Pref_Reaction_time_4", "Pref_Reaction_time_5",
"Pref_Reaction_time", "Random_number")
## check the variable names are what we just tried to set them to be
class_RTs
#######################################################
#######################################################
## Check the variable types are correct
## (they should be in this case, but checking is a good habit.)
## Timestamp should be a character
## ID should be a character
## Gender should be a character
## Handed should be character
## The remaining variables should be numeric (<dbl> if fractional, <int> if whole numbers).
class_RTs
#######################################################
#######################################################
## Correct or exclude problematic data
## If we have problems here, with variables of the wrong type,
## it probably means some of the data entry is a bit messed up.
## the skim() function is a really nice one for looking at the data,
## including if any variables have missing values (NAs)
skim(class_RT)
#######################################################
## and the number of observations of each gender
### Check numbers of data points in each gender
table(class_RTs$Gender)
#######################################################
#######################################################
## Now make a figure containing the histogram of reaction times
ggplot(data=class_RTs, aes(x=???)) + geom_histogram()
## Now make a figure containing two histograms histograms (i.e. two "facets"), one for each gender
ggplot(data=class_RTs, aes(x=???)) + geom_histogram() + facet_grid(~Gender)
## And a box and whisker plot
ggplot(data=class_RTs, aes(x=???, y=???)) + geom_boxplot()
## Or just the data points (with some jitter, to separate overlapping points):
ggplot(data=class_RTs, aes(x=???, y=???)) + geom_point() + geom_jitter(width=0.05)
#######################################################
#######################################################
## Do you think there is a difference in reaction times between females and males?
## What is the effect size (i.e. the magnitude of the difference?)
## Is this likely to be of practical significance?
## Look at your graphs and assess assumptions:
## - Do you think the residuals will be normally distributed?
## - Do the two groups have similar variance?
## - Do there seem to be any outliers?
## - Are data points independent? (You don't get this from the graph, but rather from knowing how the data were collected.)
#######################################################
#######################################################
## Do the t test and assign the outcome to an object:
my_ttest <- t.test(??? ~ ???, data=class_RTs, var.equal=TRUE)
## look at the result of the t-test
t.test
#######################################################
#######################################################
## Critical thinking
# How might the work be flawed?
# How might the analysis be flawed (assumptions violated)?
# Is the difference (i.e. effect size) small, medium, large, relative to differences caused by other factors?
# How general might be the finding?
# How do the qualitative and quantitative findings compare to those in previous studies?
# What could have been done better?
# What are the implications of the findings?
#######################################################
#######################################################
## Report and communicate the results
## Write a sentence that gives the direction and extent of difference,
## and a measure of certainty / uncertainty in that finding.
## Make a beautiful graph that very clearly communicates the findings!
ggplot(data=???, aes(x=???, y=???)) + geom_boxplot() +
ylab("Reaction time (seconds)")