-
Notifications
You must be signed in to change notification settings - Fork 1
/
lesson19_Rcode.R
100 lines (79 loc) · 2.57 KB
/
lesson19_Rcode.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# ==================================
# Lesson 19 - logistic regression
#
# Melinda Higgins, PhD
# dated 10/31/2017
# ==================================
# ==================================
# we're be working with the
# helpmkh dataset
# ==================================
library(tidyverse)
library(haven)
helpdat <- haven::read_spss("helpmkh.sav")
# ============================================.
# For this lesson we'll use the helpmkh dataset
#
# Let's focus on homeless as the main outcome variable
# which is dichotomous coded 0 and 1. We'll use
# logistic regression to look at predicting whether someone
# was homeless or not using these variables
# age, female, pss_fr, pcs, mcs, cesd and indtot
# ============================================.
h1 <- helpdat %>%
select(homeless, age, female, pss_fr,
pcs, mcs, cesd, indtot)
# ============================================.
# let's look at the correlations between these variables
# ============================================;
# look at the correlation matrix
library(psych)
psych::corr.test(h1, method="pearson")
# ============================================.
# Given the stronger correlation between indtot
# and homeless, let's run a t-test to see the comparison
# ============================================;
# Bartlett Test of Homogeneity of Variances
bartlett.test(indtot~homeless, data=h1)
# t-tests, unequal variance and then equal variance
t.test(indtot ~ homeless, h1)
t.test(indtot ~ homeless, h1,
var.equal=TRUE)
# ============================================.
# Let's run a logistic regression of indtot to predict
# the probability of being homeless
# we'll also SAVE the predicted probabilities
# and the predicted group membership
#
# let's look at different thresholds pprob
# ctable gives us the classification table
#
# use the plots=roc to get the ROC curve
# ============================================;
m1 <- glm(homeless ~ indtot, data=h1,
family=binomial)
m1
summary(m1)
coef(m1)
exp(coef(m1))
m1.predict <- predict(m1, newdata=h1,
type="response")
plot(h1$indtot, m1.predict)
library(Rcmdr)
#debug(utils:::unpackPkgZip)
#install.packages("rattle")
# install.packages("RGtk2")
#library(RGtk2)
#library(rattle)
#rattle()
# see https://www.r-bloggers.com/how-to-perform-a-logistic-regression-in-r/
# make an ROC curve
library(ROCR)
p <- predict(m1, newdata=h1,
type="response")
pr <- prediction(p, as.numeric(h1$homeless))
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)
auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc