-
-
Notifications
You must be signed in to change notification settings - Fork 17
/
mp_ds2cards.sas
291 lines (262 loc) · 8.36 KB
/
mp_ds2cards.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
/**
@file
@brief Create a CARDS file from a SAS dataset.
@details Uses dataset attributes to convert all data into datalines.
Running the generated file will rebuild the original dataset. Includes
support for large decimals, binary data, PROCESSED_DTTM columns, and
alternative encoding. If the input dataset is empty, the cards file will
still be created.
Additional support to generate a random sample and max rows.
Usage:
%mp_ds2cards(sashelp.class
, tgt_ds=work.class
, cards_file= "C:\temp\class.sas"
, showlog=NO
, maxobs=5
)
TODO:
- labelling the dataset
- explicity setting a unix LF
- constraints / indexes etc
@param [in] base_ds Should be two level - eg work.blah. This is the table
that is converted to a cards file.
@param [in] tgt_ds= Table that the generated cards file would create.
Optional - if omitted, will be same as BASE_DS.
@param [out] cards_file= ("%sysfunc(pathname(work))/cardgen.sas") Location in
which to write the (.sas) cards file
@param [in] maxobs= (max) To limit output to the first <code>maxobs</code>
observations, enter an integer here.
@param [in] random_sample= (NO) Set to YES to generate a random sample of
data. Can be quite slow.
@param [in] showlog= (YES) Whether to show generated cards file in the SAS
log. Valid values:
@li YES
@li NO
@param [in] outencoding= Provide encoding value for file statement (eg utf-8)
@param [in] append= (NO) If NO then will rebuild the cards file if it already
exists, otherwise will append to it. Used by the mp_lib2cards.sas macro.
<h4> Related Macros </h4>
@li mp_lib2cards.sas
@li mp_ds2inserts.sas
@li mp_mdtablewrite.sas
@version 9.2
@author Allan Bowe
@cond
**/
%macro mp_ds2cards(base_ds, tgt_ds=
,cards_file="%sysfunc(pathname(work))/cardgen.sas"
,maxobs=max
,random_sample=NO
,showlog=YES
,outencoding=
,append=NO
)/*/STORE SOURCE*/;
%local i setds nvars;
%if not %sysfunc(exist(&base_ds)) %then %do;
%put %str(WARN)ING: &base_ds does not exist;
%return;
%end;
%if %index(&base_ds,.)=0 %then %let base_ds=WORK.&base_ds;
%if (&tgt_ds = ) %then %let tgt_ds=&base_ds;
%if %index(&tgt_ds,.)=0 %then %let tgt_ds=WORK.%scan(&base_ds,2,.);
%if ("&outencoding" ne "") %then %let outencoding=encoding="&outencoding";
%if ("&append" = "" or "&append" = "NO") %then %let append=;
%else %let append=mod;
/* get varcount */
%let nvars=0;
proc sql noprint;
select count(*) into: nvars from dictionary.columns
where upcase(libname)="%scan(%upcase(&base_ds),1)"
and upcase(memname)="%scan(%upcase(&base_ds),2)";
%if &nvars=0 %then %do;
%put %str(WARN)ING: Dataset &base_ds has no variables, will not be converted.;
%return;
%end;
/* get indexes */
proc sort
data=sashelp.vindex(
where=(upcase(libname)="%scan(%upcase(&base_ds),1)"
and upcase(memname)="%scan(%upcase(&base_ds),2)")
)
out=_data_;
by indxname indxpos;
run;
%local indexes;
data _null_;
set &syslast end=last;
if _n_=1 then call symputx('indexes','(index=(','l');
by indxname indxpos;
length vars $32767 nom uni $8;
retain vars;
if first.indxname then do;
idxcnt+1;
nom='';
uni='';
vars=name;
end;
else vars=catx(' ',vars,name);
if last.indxname then do;
if nomiss='yes' then nom='/nomiss';
if unique='yes' then uni='/unique';
call symputx('indexes'
,catx(' ',symget('indexes'),indxname,'=(',vars,')',nom,uni)
,'l');
end;
if last then call symputx('indexes',cats(symget('indexes'),'))'),'l');
run;
data;run;
%let setds=&syslast;
proc sql
%if %datatyp(&maxobs)=NUMERIC %then %do;
outobs=&maxobs;
%end;
;
create table &setds as select * from &base_ds
%if &random_sample=YES %then %do;
order by ranuni(42)
%end;
;
reset outobs=max;
create table datalines1 as
select name,type,length,varnum,format,label from dictionary.columns
where upcase(libname)="%upcase(%scan(&base_ds,1))"
and upcase(memname)="%upcase(%scan(&base_ds,2))";
/**
Due to long decimals cannot use best. format
So - use bestd. format and then use character functions to strip trailing
zeros, if NOT an integer or missing!! Cannot use int() as it upsets
note2err when there are missings.
resolved code = ifc( mod(coalesce(VARIABLE,0),1)=0
,put(VARIABLE,best32.)
,substrn(put(VARIABLE,bestd32.),1
,findc(put(VARIABLE,bestd32.),'0','TBK')));
**/
data datalines_2;
format dataline $32000.;
set datalines1 (where=(upcase(name) not in
('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM')));
if type='num' then dataline=
cats('ifc(mod(coalesce(',name,',0),1)=0
,put(',name,',best32.-l)
,substrn(put(',name,',bestd32.-l),1
,findc(put(',name,',bestd32.-l),"0","TBK")))');
/**
* binary data must be converted, to store in text format. It is identified
* by the presence of the $HEX keyword in the format.
*/
else if upcase(format)=:'$HEX' then
dataline=cats('put(trim(',name,'),',format,')');
/**
* There is no easy way to store line breaks in a cards file.
* To discuss this, use: https://github.com/sasjs/core/issues/80
* Removing all nonprintables with kw (keep writeable)
*/
else dataline=cats('compress(',name,', ,"kw")');
run;
proc sql noprint;
select dataline into: datalines separated by ',' from datalines_2;
%local
process_dttm_flg
valid_from_dttm_flg
valid_to_dttm_flg
;
%let process_dttm_flg = N;
%let valid_from_dttm_flg = N;
%let valid_to_dttm_flg = N;
data _null_;
set datalines1 ;
/* build attrib statement */
if type='char' then type2='$';
if strip(format) ne '' then format2=cats('format=',format);
if strip(label) ne '' then label2=cats('label=',quote(trim(label)));
str1=catx(' ',(put(name,$33.)||'length=')
,put(cats(type2,length),$7.)||format2,label2);
/* Build input statement */
if upcase(format)=:'$HEX' then type3=':'!!format;
else if type='char' then type3=':$char.';
str2=put(name,$33.)||type3;
if(upcase(name) = "PROCESSED_DTTM") then
call symputx("process_dttm_flg", "Y", "L");
if(upcase(name) = "VALID_FROM_DTTM") then
call symputx("valid_from_dttm_flg", "Y", "L");
if(upcase(name) = "VALID_TO_DTTM") then
call symputx("valid_to_dttm_flg", "Y", "L");
call symputx(cats("attrib_stmt_", put(_N_, 8.)), str1, "L");
call symputx(cats("input_stmt_", put(_N_, 8.))
, ifc(upcase(name) not in
('PROCESSED_DTTM','VALID_FROM_DTTM','VALID_TO_DTTM'), str2, ""), "L");
run;
data _null_;
file &cards_file. &outencoding lrecl=32767 termstr=nl &append;
length __attrib $32767;
if _n_=1 then do;
put '/**';
put ' @file';
put " @brief Datalines for %upcase(%scan(&base_ds,2)) dataset";
put " @details Generated by %nrstr(%%)mp_ds2cards()";
put " Source: https://github.com/sasjs/core";
put ' @cond ';
put '**/';
put "data &tgt_ds &indexes;";
put "attrib ";
%do i = 1 %to &nvars;
__attrib=symget("attrib_stmt_&i");
put __attrib;
%end;
put ";";
%if &process_dttm_flg. eq Y %then %do;
put 'retain PROCESSED_DTTM %sysfunc(datetime());';
%end;
%if &valid_from_dttm_flg. eq Y %then %do;
put 'retain VALID_FROM_DTTM &low_date;';
%end;
%if &valid_to_dttm_flg. eq Y %then %do;
put 'retain VALID_TO_DTTM &high_date;';
%end;
if __nobs=0 then do;
put 'call missing(of _all_);/* avoid uninitialised notes */';
put 'stop;';
put 'run;';
end;
else do;
put "infile cards dsd;";
put "input ";
%do i = 1 %to &nvars.;
%if(%length(&&input_stmt_&i..)) %then
put " &&input_stmt_&i..";
;
%end;
put ";";
put 'missing a b c d e f g h i j k l m n o p q r s t u v w x y z _;';
put "datalines4;";
end;
end;
set &setds end=__lastobs nobs=__nobs;
/* remove all formats for write purposes - some have long underlying decimals */
format _numeric_ best30.29;
length __dataline $32767;
__dataline=catq('cqtmb',&datalines);
put __dataline;
if __lastobs then do;
put ';;;;';
put 'run;';
put '/** @endcond **/';
stop;
end;
run;
proc sql;
drop table &setds;
quit;
%if &showlog=YES %then %do;
data _null_;
infile &cards_file lrecl=32767;
input;
put _infile_;
run;
%end;
%put NOTE: CARDS FILE SAVED IN:;
%put NOTE-;%put NOTE-;
%put NOTE- %sysfunc(dequote(&cards_file.));
%put NOTE-;%put NOTE-;
%mend mp_ds2cards;
/** @endcond **/