-
Notifications
You must be signed in to change notification settings - Fork 1
/
HELP_univariate_SAScode.sas
290 lines (256 loc) · 6.72 KB
/
HELP_univariate_SAScode.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
* ==========================================;
* N736 Lesson 07 - by Melinda Higgins, PhD
* last updated 09/26/2018
*
* Univariate Stats
* working with the HELP dataset
* ==========================================;
* create a library - change to your directory
* where you have downloaded the help.sas7bdat dataset;
LIBNAME L7 'C:\MyGithub\N736_lesson_Univariate';
* =========================================
* apply FORMATS
* =========================================;
proc format library = WORK ;
value TREAT
0 = 'usual care'
1 = 'HELP clinic' ;
value FEMALE
0 = 'Male'
1 = 'Female' ;
value HOMELESS
0 = 'no'
1 = 'yes' ;
value G1B
0 = 'no'
1 = 'yes' ;
value F1A
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1B
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1C
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1D
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1E
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1F
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1G
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1H
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1I
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1J
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1K
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1L
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1M
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1N
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1O
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1P
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1Q
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1R
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1S
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value F1T
0 = 'Not at all or less than 1 day'
1 = '1-2 days'
2 = '3-4 days'
3 = '5-7 days or nearly every day for 2 weeks' ;
value SATREAT
0 = 'no'
1 = 'yes' ;
value DRINKSTATUS
0 = 'no'
1 = 'yes' ;
value ANYSUBSTATUS
0 = 'no'
1 = 'yes' ;
value LINKSTATUS
0 = 'no'
1 = 'yes' ;
proc datasets library = WORK;
modify helpmkh / correctencoding="WLATIN1";
format treat TREAT.;
format female FEMALE.;
format homeless HOMELESS.;
format g1b G1B.;
format f1a F1A.;
format f1b F1B.;
format f1c F1C.;
format f1d F1D.;
format f1e F1E.;
format f1f F1F.;
format f1g F1G.;
format f1h F1H.;
format f1i F1I.;
format f1j F1J.;
format f1k F1K.;
format f1l F1L.;
format f1m F1M.;
format f1n F1N.;
format f1o F1O.;
format f1p F1P.;
format f1q F1Q.;
format f1r F1R.;
format f1s F1S.;
format f1t F1T.;
format satreat SATREAT.;
format drinkstatus DRINKSTATUS.;
format anysubstatus ANYSUBSTATUS.;
format linkstatus LINKSTATUS.;
quit;
* make a copy in the WORK library;
DATA work.help;
SET L7.helpmkh;
RUN;
proc contents data=help; run;
* get univariate stats;
proc univariate data=help plots;
var age;
run;
* OPTIONAL - pay attention to options
* can change the percentile algorithm
* default is PCTLDEF=5, but there are
* options 1,2,3,4 or 5 - see help for more details;
proc univariate data=help plots pctldef=1;
var age;
run;
* try algorithm 3;
proc univariate data=help plots pctldef=3;
var age;
run;
* get univariate stats
* add histogram
* and overlay normal curve
* and get normal probability tests;
proc univariate data=help plots pctldef=1;
var age;
histogram age / normal;
run;
* get other probability plots;
proc univariate data=help plots pctldef=1;
var age;
ppplot age;
probplot age;
qqplot age;
run;
* some plots - boxplot;
* to get a boxplot of 1 variable
* we have to create a dummy variable
* that is a constant, x=1, then we can use
* this variable to trick SAS into making 1 boxplot;
data help2;
set help;
x=1;
run;
proc boxplot data=help2;
plot age*x;
run;
* boxplot of age by racegrp
* using proc sgplot and VBOX option;
PROC SGPLOT DATA=help;
VBOX age / category = racegrp;
RUN;
* can change the percentile method if you want;
PROC SGPLOT DATA=help;
VBOX age / category = racegrp percentile=3;
RUN;
* other summaries;
proc means data=help;
var age;
run;
* ages by race;
proc means data=help;
var age;
class racegrp;
run;
proc means data=help;
var age;
class treat;
run;
* categorical data;
proc freq data=help;
tables racegrp / plots=freqplot;
run;
* without the formats applied;
proc freq data=help;
tables f1a / plots=freqplot;
run;
* get percents by treatment group;
proc freq data=help;
tables racegrp * treat / chisq;
run;
* apply formats;
* SKIP;
proc freq data=help;
tables f1a / plots=freqplot;
FORMAT f1a F1A.;
run;