Skip to content

Commit

Permalink
feat: mp_stripdiffs macro - closes #373
Browse files Browse the repository at this point in the history
  • Loading branch information
^ committed Apr 25, 2024
1 parent e616bc9 commit 556c7bd
Show file tree
Hide file tree
Showing 6 changed files with 579 additions and 11 deletions.
247 changes: 241 additions & 6 deletions all.sas
Original file line number Diff line number Diff line change
Expand Up @@ -5541,13 +5541,21 @@ data _null_;
header = cats(coalescec(varlabel(dsid,i),varnm),dlm);
%end;
%else %if &headerformat=SASJS %then %do;
if vartype(dsid,i)='C' then header=cats(varnm,':$char',varlen(dsid,i),'.');
vlen=varlen(dsid,i);
if vartype(dsid,i)='C' then header=cats(varnm,':$char',vlen,'.');
else do;
vfmt=coalescec(varfmt(dsid,i),'0');
fmttype=mcf_getfmttype(vfmt);
if fmttype='DATE' then header=cats(varnm,':date9.');
else if fmttype='DATETIME' then header=cats(varnm,':E8601DT26.6');
else if fmttype='TIME' then header=cats(varnm,':TIME12.');
/**
* there is not much point importing a short length numeric like this,
* eg with best4., as the resulting variable will still be stored as
* length 8. We need a length or format statement to ensure variable
* is creatd with the smaller length...
**/
else if vlen<8 then header=cats(varnm,':best',vlen,'.');
else header=cats(varnm,':best.');
end;
%end;
Expand All @@ -5574,6 +5582,7 @@ data _null_;
set &ds end=last;
%do i=1 %to &vcnt;
%let var=%scan(&varlist,&i);
%local vlen&i;
%if %mf_getvartype(&ds,&var)=C %then %do;
%let dsv1=%mf_getuniquename(prefix=csvcol1_);
%let dsv2=%mf_getuniquename(prefix=csvcol2_);
Expand Down Expand Up @@ -12466,6 +12475,7 @@ run;
@li mp_coretable.sas
@li mp_stackdiffs.test.sas
@li mp_storediffs.sas
@li mp_stripdiffs.sas

@todo The current approach assumes that a variable called KEY_HASH is not on
the base table. This part will need to be refactored (eg using
Expand Down Expand Up @@ -12924,6 +12934,7 @@ select distinct tgtvar_nm into: missvars separated by ' '
<h4> Related Macros </h4>
@li mp_stackdiffs.sas
@li mp_storediffs.test.sas
@li mp_stripdiffs.sas

@version 9.2
@author Allan Bowe
Expand Down Expand Up @@ -13043,7 +13054,7 @@ data &ds4;
run;

%if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime(),8.6);
%let libds=%upcase(&libds);

/* join orig vals for modified & deleted */
Expand Down Expand Up @@ -13373,6 +13384,229 @@ run;

%mend mp_streamfile;
/**
@file
@brief Generates a stage dataset to revert diffs tracked in an audit table
@details A big benefit of tracking data changes in an audit table is that
those changes can be subsequently reverted if necessary!

This macro prepares a staging dataset containing those differences - eg for:

@li deleted rows - these are re-inserted
@li changed rows - differences are reverted
@li added rows - these are marked with `_____DELETE_THIS_RECORD_____="YES"`

These changes are NOT applied to the base table - a staging dataset is
simply prepared for an ETL process to action. In Data Controller, this
dataset is used directly as an input to the APPROVE process (so that the
reversion diffs can be reviewed prior to being applied).


@param [in] libds Base library.dataset (will not be modified). The library
must be assigned.
@param [in] loadref Unique identifier for the version to be reverted. This
change, plus ALL SUBSEQUENT CHANGES, will be reverted in the output table.
@param [in] difftable The dataset containing the diffs. Definition available
in mddl_dc_difftable.sas
@param [out] outds= (work.mp_stripdiffs) Output table containing the diffs.
Has the same format as the base datset, plus a
`_____DELETE_THIS_RECORD_____` variable.
@param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs

<h4> SAS Macros </h4>
@li mf_getuniquefileref.sas
@li mf_getuniquename.sas
@li mf_islibds.sas
@li mp_abort.sas

<h4> Related Macros </h4>
@li mddl_dc_difftable.sas
@li mp_stackdiffs.sas
@li mp_storediffs.sas
@li mp_stripdiffs.test.sas

@version 9.2
@author Allan Bowe
**/
/** @cond */

%macro mp_stripdiffs(libds
,loadref
,difftable
,outds=work.mp_stripdiffs
,mdebug=0
)/*/STORE SOURCE*/;
%local dbg;
%if &mdebug=1 %then %do;
%put &sysmacroname entry vars:;
%put _local_;
%end;
%else %let dbg=*;

%let libds=%upcase(&libds);

/* safety checks */
%mp_abort(iftrue= (&syscc ne 0)
,mac=&sysmacroname
,msg=%str(SYSCC=&syscc on entry. Clean session required!)
)
%let libds=%upcase(&libds);
%mp_abort(iftrue= (%mf_islibds(&libds)=0)
,mac=&sysmacroname
,msg=%str(Invalid library.dataset reference - %superq(libds))
)



/* set up unique and temporary vars */
%local ds1 ds2 ds3 ds4 ds5 fref1;
%let fref1=%mf_getuniquefileref();

/* get timestamp of the diff to be reverted */
%local ts;
proc sql noprint;
select put(processed_dttm,datetime19.6) into: ts
from &difftable where load_ref="&loadref";
%mp_abort(iftrue= (&sqlobs=0)
,mac=&sysmacroname
,msg=%str(Load ref %superq(loadref) not found!)
)

/* extract diffs for this base table from this timestamp onwards */
%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_diffs));
create table &ds1 (drop=libref dsn) as
select * from &difftable
where upcase(cats(libref))="%scan(&libds,1,.)"
and upcase(cats(dsn))="%scan(&libds,2,.)"
and processed_dttm ge "&ts"dt
order by processed_dttm desc, key_hash, is_pk;

/* extract key values only */
%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks));
create table &ds2 as
select key_hash,
tgtvar_nm,
tgtvar_type,
coalescec(oldval_char,newval_char) as charval,
coalesce(oldval_num, newval_num) as numval,
processed_dttm
from &ds1
where is_pk=1
order by key_hash, processed_dttm;

/* grab pk values */
%local pk;
data _null_;
set &ds2;
by key_hash;
call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l');
if last.key_hash then stop;
run;

%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar));
proc transpose data=&ds2(where=(tgtvar_type='C'))
out=&ds3(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var charval;
run;

%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_keynum));
proc transpose data=&ds2(where=(tgtvar_type='N'))
out=&ds4(drop=_name_);
by KEY_HASH;
id TGTVAR_NM;
var numval;
run;
/* shorten the lengths */
%mp_ds2squeeze(&ds3,outds=&ds3)
%mp_ds2squeeze(&ds4,outds=&ds4)

%let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged));
data &ds5;
merge &ds3 &ds4;
by key_hash;
if not missing(key_hash);
run;

/* join to base table for preliminary stage DS */
proc sql;
create table &outds as select "No " as _____DELETE_THIS_RECORD_____,
b.*
from &ds5 a
inner join &libds b
on 1=1
%do x=1 %to %sysfunc(countw(&pk,%str( )));
and a.%scan(&pk,&x,%str( ))=b.%scan(&pk,&x,%str( ))
%end;
;

/* create SAS code to apply to stage_ds */
data _null_;
set &ds1;
file &fref1;
if _n_=1 then put 'proc sql noprint;';
by descending processed_dttm key_hash is_pk;
if move_type='M' then do;
if first.key_hash then do;
put "update &outds set " @@;
end;
if IS_PK=0 then do;
put " " tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
if not last.is_pk then put ',';
end;
else do;
if first.is_pk then put " where 1=1 " @@;
put " and " tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
end;
end;
else if move_type='A' then do;
if first.key_hash then do;
put "update &outds set _____DELETE_THIS_RECORD_____='Yes' where 1=1 " @@;
end;
/* gating if - as only need PK now */
if is_pk=1;
put ' AND ' tgtvar_nm '=' @@;
charval=quote(cats(newval_char));
if tgtvar_type='C' then put charval @@;
else put newval_num @@;
end;
else if move_type='D' then do;
if first.key_hash then do;
put "insert into &outds set _____DELETE_THIS_RECORD_____='No' " @@;
end;
put " ," tgtvar_nm '=' @@;
charval=quote(cats(oldval_char));
if tgtvar_type='C' then put charval @@;
else put oldval_num @@;
end;
if last.key_hash then put ';';
run;

/* apply the modification statements */
%inc &fref1/source2;

%if &mdebug=0 %then %do;
proc sql;
drop table &ds1, &ds2, &ds3, &ds4, &ds5;
file &fref1 clear;
%end;
%else %do;
data _null_;
infile &fref1;
input;
if _n_=1 then putlog "Contents of SQL adjustments";
putlog _infile_;
run;
%end;

%mend mp_stripdiffs;
/** @endcond *//**
@file
@brief Runs arbitrary code for a specified amount of time
@details Executes a series of procs and data steps to enable performance
Expand Down Expand Up @@ -17964,10 +18198,11 @@ run;

@param [in] user= the metadata user to return groups for. Leave blank for all
groups.
@param [in] repo= the metadata repository that contains the user/group
information
@param [in] mDebug= set to 1 to show debug messages in the log
@param [out] outds= the dataset to create that contains the list of groups
@param [in] repo= (foundation) the metadata repository that contains the
user/group information
@param [in] mDebug= (0) set to 1 to show debug messages in the log
@param [out] outds= (work.mm_getgroups) The dataset to create that contains
the list of groups

@returns outds dataset containing all groups in a column named "metagroup"
- groupuri
Expand Down
1 change: 1 addition & 0 deletions base/mp_stackdiffs.sas
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@
@li mp_coretable.sas
@li mp_stackdiffs.test.sas
@li mp_storediffs.sas
@li mp_stripdiffs.sas
@todo The current approach assumes that a variable called KEY_HASH is not on
the base table. This part will need to be refactored (eg using
Expand Down
3 changes: 2 additions & 1 deletion base/mp_storediffs.sas
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
<h4> Related Macros </h4>
@li mp_stackdiffs.sas
@li mp_storediffs.test.sas
@li mp_stripdiffs.sas
@version 9.2
@author Allan Bowe
Expand Down Expand Up @@ -183,7 +184,7 @@ data &ds4;
run;

%if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime());
%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime(),8.6);
%let libds=%upcase(&libds);

/* join orig vals for modified & deleted */
Expand Down
Loading

0 comments on commit 556c7bd

Please sign in to comment.