Skip to content

Commit

Permalink
mcxload -etc/-235: fix idx:val parse error and prevent --stream-mirror
Browse files Browse the repository at this point in the history
  • Loading branch information
micans committed Dec 16, 2021
1 parent cc13f96 commit 6fd2b6b
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 21 deletions.
14 changes: 12 additions & 2 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@


* ...

* Mcl source is now hosted on github.com/micans/mcl .
Compiling mcl now requires installation of cimfomfa, which provides the C
utility library libtingea.
Expand All @@ -14,6 +12,18 @@
practice has stopped. The mcl source tree contains a script that
will download both cimfomfa and mcl tar archives and compile both.

* Fixed bug in mcxarray in the case of NAs in data. The wrong N was used.
Additionally in the presence of NAs the spearman correlation coefficient
uses the ranks from the vectors pre-removal of the join NA set. There is
now a mode --flexnasp that will re-compute ranks after removal of this
set in both vectors, separately for each pair of vectors.

* Fixed bug in mcxload -etc <file> --expect-values; a logical cascade
triggered wrong parse branch.

* Fixed bug in mcxload -etc / -235 and others. It is no longer possible
to combine these with --stream-mirror. Use -ri instead.


Fri May 16 2014

Expand Down
28 changes: 21 additions & 7 deletions TODO
Original file line number Diff line number Diff line change
@@ -1,15 +1,29 @@

clxdo or other script to compute distances for line-based <LABEL> <CID> cluster format


! statsReset moved. valgrind it.

- mclxVectorDispatch why is matrix argument not const.
- unit test
mcxarray
mcxload
autogenerate. idempotency, some sort of invariant check.
mcxload issues/axes
- auto-extension of tabs
- keeping track of maximum index seen (symmetric/directed)
- strict / restrict / extend
- dropping/ignoring of data (restrict)
- labels vs numbers
- symmetric vs directed tabs
- symmetric vs directed edges
- values vs novalues
- lots of fixmes in code

- doc/{Makefile.am,index.in.azm} (mcxassemble, mclpipeline)

! statsReset moved. fresh mcl valgrind

- mclxVectorDispatch; matrix argument not const. This for e.g. mclxSymReduceDispatch,
but perhaps consider the latter and similar (knn reductions) for purge.

- update/point README compile instructions

- mcxarray unit tests
implement full re-compute of ranks in case of NAs + spearman?

- mcxsubs reimplement; mcxi or lex/yacc
- consider also tf()
Expand Down
2 changes: 1 addition & 1 deletion shed/buildit
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail
mkdir -p slate/build
cd slate/build

../../configure CFLAGS="-I$HOME/local/include" LDFLAGS=-L$HOME/local/lib --prefix=$HOME/local --enable-maintainer-mode
../../configure CFLAGS="-g -I$HOME/local/include" LDFLAGS=-L$HOME/local/lib --prefix=$HOME/local --enable-maintainer-mode
# ../../configure CFLAGS="-Wall -Wextra -std=gnu99 -pedantic -Wmissing-prototypes -Wstrict-prototypes -Wold-style-definition -fno-common -I$HOME/local/include -Wno-int-in-bool-context -Wno-unused-but-set-variable -Wno-unused-function -Wno-enum-compare -Wno-unused-parameter -Wno-unused-variable -Wno-sign-compare -Wno-misleading-indentation" LDFLAGS=-L$HOME/local/lib --prefix=$HOME/local --enable-maintainer-mode

make clean
Expand Down
21 changes: 14 additions & 7 deletions src/impala/stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,6 @@ const char* module = "mclxIOstreamIn";
#define MCLXIO_STREAM_CTAB_RO (MCLXIO_STREAM_CTAB_STRICT | MCLXIO_STREAM_CTAB_RESTRICT)
#define MCLXIO_STREAM_RTAB_RO (MCLXIO_STREAM_RTAB_STRICT | MCLXIO_STREAM_RTAB_RESTRICT)

#define MCLXIO_STREAM_ETCANY (MCLXIO_STREAM_ETC | MCLXIO_STREAM_ETC_AI | MCLXIO_STREAM_SIF)
#define MCLXIO_STREAM_235ANY (MCLXIO_STREAM_235 | MCLXIO_STREAM_235_AI)

#define DEBUG 0
#define DEBUG2 0
#define DEBUG3 0
Expand Down Expand Up @@ -312,6 +309,7 @@ static mcxstatus read_etc
; break
; }
state->etcbuf_ofs += n_char_read
;if(0)fprintf(stderr, "start found %lu\n", iface->x)
; if (iface->map_c->max_seen+1 < iface->x+1) /* note mixed-sign comparison */
iface->map_c->max_seen = iface->x
; state->x_prev = iface->x
Expand Down Expand Up @@ -357,11 +355,15 @@ static mcxstatus read_etc
; }

if (bits & (MCLXIO_STREAM_235_AI | MCLXIO_STREAM_235))
{ if
{
;if(0)fprintf(stderr, "what's buf it's [%s]\n", state->etcbuf->str+state->etcbuf_ofs);
if
( ( tryvalue
&& 2 != sscanf(state->etcbuf->str+state->etcbuf_ofs, "%lu:%lf%n", &(iface->y), value, &n_char_read)
)
|| 1 != sscanf(state->etcbuf->str+state->etcbuf_ofs, "%lu%n", &(iface->y), &n_char_read)
|| ( ! tryvalue
&& 1 != sscanf(state->etcbuf->str+state->etcbuf_ofs, "%lu%n", &(iface->y), &n_char_read)
)
)
{ char* s = state->etcbuf->str+state->etcbuf_ofs
; while(isspace((uchar) s[0]))
Expand All @@ -376,7 +378,7 @@ static mcxstatus read_etc
; }
else
{
;if(DEBUG3)fprintf(stdbug, "hit at %d\n", (int) state->etcbuf_ofs);
;if(DEBUG3)fprintf(stdbug, "hit at %d (value %f) (read %d)\n", (int) state->etcbuf_ofs, *value, (int)n_char_read);
state->etcbuf_ofs += n_char_read
; if (iface->map_r->max_seen+1 < iface->y+1) /* note mixed-sign comparison */
iface->map_r->max_seen = iface->y
Expand Down Expand Up @@ -814,8 +816,10 @@ static mclx* make_mx_from_pars
; dim i

; if (bits & MCLXIO_STREAM_235ANY)
{ if (streamer->cmax_235 > 0 && dc_max_seen < streamer->cmax_235 - 1)
{ if (streamer->cmax_235 > 0 && dc_max_seen+1 < streamer->cmax_235)
dc_max_seen = streamer->cmax_235-1
; if (streamer->rmax_235 > 0 && dr_max_seen+1 < streamer->rmax_235)
dr_max_seen = streamer->rmax_235-1
; }
else if (bits & MCLXIO_STREAM_123)
{ if (streamer->cmax_123 > 0 && dc_max_seen+1 < streamer->cmax_123)
Expand Down Expand Up @@ -1075,6 +1079,9 @@ mclx* mclxIOstreamIn
{ mcxErr(me, "x-extend fails")
; break
; }
/* fixme: this code should check somewhere that y is in range;
* right now we depend on caller not having set mirror in multi-column case
*/
if (mirror && mclpARextend(iface.pars+y, x, value))
{ mcxErr(me, "y-extend fails")
; break
Expand Down
4 changes: 4 additions & 0 deletions src/impala/stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ mcxstatus mclxIOstreamOut
#define MCLXIO_STREAM_GTAB_RESTRICT (MCLXIO_STREAM_RTAB_RESTRICT | MCLXIO_STREAM_CTAB_RESTRICT)
#define MCLXIO_STREAM_GTAB_STRICT (MCLXIO_STREAM_RTAB_STRICT | MCLXIO_STREAM_CTAB_STRICT)

#define MCLXIO_STREAM_ETCANY (MCLXIO_STREAM_ETC | MCLXIO_STREAM_ETC_AI | MCLXIO_STREAM_SIF)
#define MCLXIO_STREAM_235ANY (MCLXIO_STREAM_235 | MCLXIO_STREAM_235_AI)

#define MCLXIO_STREAM_MULTICOLUMN (MCLXIO_STREAM_ETCANY | MCLXIO_STREAM_235ANY)

/* In abc mode, it tries to separate on tab if it spots a tab;
* otherwise it separates on whitespace.
Expand All @@ -81,6 +84,7 @@ typedef struct
; dim cmax_123
; dim rmax_123
; dim cmax_235
; dim rmax_235
;
} mclxIOstreamer ;

Expand Down
26 changes: 22 additions & 4 deletions src/shmx/mcxload.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,25 @@ enum
, MY_OPT_OUT_TABG
, MY_OPT_DMAX

, MY_OPT_STRICT_TABC = MY_OPT_OUT_TABG + 2
, MY_OPT_STRICT_TABC = MY_OPT_DMAX + 2
, MY_OPT_RESTRICT_TABC
, MY_OPT_EXTEND_TABC
, MY_OPT_CANONICALC
, MY_OPT_SCRUB_DOMC
, MY_OPT_OUT_TABC
, MY_OPT_CMAX
, MY_OPT_CREQUIRE_235
, MY_OPT_RREQUIRE_235

, MY_OPT_STRICT_TABR = MY_OPT_CMAX + 2
, MY_OPT_STRICT_TABR = MY_OPT_RREQUIRE_235 + 2
, MY_OPT_RESTRICT_TABR
, MY_OPT_EXTEND_TABR
, MY_OPT_SCRUB_DOMR
, MY_OPT_CANONICALR
, MY_OPT_OUT_TABR
, MY_OPT_RMAX

, MY_OPT_DEDUP = MY_OPT_OUT_TABR + 2
, MY_OPT_DEDUP = MY_OPT_RMAX + 2
, MY_OPT_STREAM_TRANSFORM
, MY_OPT_TRANSFORM
, MY_OPT_STREAM_LOG
Expand Down Expand Up @@ -278,6 +279,12 @@ mcxOptAnchor options[] =
, "<num>"
, "number of columns is set to <num> at least"
}
, { "-235-maxr"
, MCX_OPT_HASARG | MCX_OPT_HIDDEN
, MY_OPT_RREQUIRE_235
, "<num>"
, "number of rows is set to <num> at least"
}
, { "-extend-tab"
, MCX_OPT_HASARG
, MY_OPT_EXTEND_TABG
Expand Down Expand Up @@ -527,7 +534,8 @@ int main
; mclxIOstreamer streamer
; void (*merge)(void* ivp1, const void* ivp2) = NULL

; mcxbool symmetric = FALSE
; mcxbool symmetric = FALSE /* this means domains are the same (implicit or explicit) */
; mcxbool mirror = FALSE /* this means edges should be undirected */
; mcxbool transpose = FALSE
; mcxbool cleanup = FALSE
; mcxbool dowrite = TRUE
Expand All @@ -552,6 +560,7 @@ int main
; streamer.cmax_123 = 0
; streamer.rmax_123 = 0
; streamer.cmax_235 = 0
; streamer.rmax_235 = 0

; mcxLogLevel =
MCX_LOG_AGGR | MCX_LOG_MODULE | MCX_LOG_IO | MCX_LOG_GAUGE | MCX_LOG_WARN
Expand Down Expand Up @@ -649,6 +658,11 @@ int main
case MY_OPT_CREQUIRE_235
: streamer.cmax_235 = atoi(opt->val)
; break
;

case MY_OPT_RREQUIRE_235
: streamer.rmax_235 = atoi(opt->val)
; break
;

case MY_OPT_RMAX
Expand Down Expand Up @@ -856,6 +870,10 @@ int main
mcxOptFree(&opts)

; symmetric = bits_stream_other & MCLXIO_STREAM_SYMMETRIC
; mirror = bits_stream_other & MCLXIO_STREAM_MIRROR

; if (mirror && (bits_stream_input & MCLXIO_STREAM_MULTICOLUMN))
mcxDie(1, me, "symmetric mode not compatible with multi-column input formats")

; if ((xfusetabc || xfusetabr || xfcachetabc || xfcachetabr) && symmetric)
mcxDie(1, me, "(implied) symmetric mode precludes all tabc and tabr options")
Expand Down

0 comments on commit 6fd2b6b

Please sign in to comment.